hash table comment fix.
[urcu.git] / formal-model / urcu-controldataflow-alpha-ipi-progress-minimal / urcu.spin
1 /*
2 * mem.spin: Promela code to validate memory barriers with OOO memory
3 * and out-of-order instruction scheduling.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * Copyright (c) 2009 Mathieu Desnoyers
20 */
21
22 /* Promela validation variables. */
23
24 /* specific defines "included" here */
25 /* DEFINES file "included" here */
26
27 #define NR_READERS 1
28 #define NR_WRITERS 1
29
30 #define NR_PROCS 2
31
32 #define get_pid() (_pid)
33
34 #define get_readerid() (get_pid())
35
36 /*
37 * Produced process control and data flow. Updated after each instruction to
38 * show which variables are ready. Using one-hot bit encoding per variable to
39 * save state space. Used as triggers to execute the instructions having those
40 * variables as input. Leaving bits active to inhibit instruction execution.
41 * Scheme used to make instruction disabling and automatic dependency fall-back
42 * automatic.
43 */
44
45 #define CONSUME_TOKENS(state, bits, notbits) \
46 ((!(state & (notbits))) && (state & (bits)) == (bits))
47
48 #define PRODUCE_TOKENS(state, bits) \
49 state = state | (bits);
50
51 #define CLEAR_TOKENS(state, bits) \
52 state = state & ~(bits)
53
54 /*
55 * Types of dependency :
56 *
57 * Data dependency
58 *
59 * - True dependency, Read-after-Write (RAW)
60 *
61 * This type of dependency happens when a statement depends on the result of a
62 * previous statement. This applies to any statement which needs to read a
63 * variable written by a preceding statement.
64 *
65 * - False dependency, Write-after-Read (WAR)
66 *
67 * Typically, variable renaming can ensure that this dependency goes away.
68 * However, if the statements must read and then write from/to the same variable
69 * in the OOO memory model, renaming may be impossible, and therefore this
70 * causes a WAR dependency.
71 *
72 * - Output dependency, Write-after-Write (WAW)
73 *
74 * Two writes to the same variable in subsequent statements. Variable renaming
75 * can ensure this is not needed, but can be required when writing multiple
76 * times to the same OOO mem model variable.
77 *
78 * Control dependency
79 *
80 * Execution of a given instruction depends on a previous instruction evaluating
81 * in a way that allows its execution. E.g. : branches.
82 *
83 * Useful considerations for joining dependencies after branch
84 *
85 * - Pre-dominance
86 *
87 * "We say box i dominates box j if every path (leading from input to output
88 * through the diagram) which passes through box j must also pass through box
89 * i. Thus box i dominates box j if box j is subordinate to box i in the
90 * program."
91 *
92 * http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
93 * Other classic algorithm to calculate dominance : Lengauer-Tarjan (in gcc)
94 *
95 * - Post-dominance
96 *
97 * Just as pre-dominance, but with arcs of the data flow inverted, and input vs
98 * output exchanged. Therefore, i post-dominating j ensures that every path
99 * passing by j will pass by i before reaching the output.
100 *
101 * Prefetch and speculative execution
102 *
103 * If an instruction depends on the result of a previous branch, but it does not
104 * have side-effects, it can be executed before the branch result is known.
105 * however, it must be restarted if a core-synchronizing instruction is issued.
106 * Note that instructions which depend on the speculative instruction result
107 * but that have side-effects must depend on the branch completion in addition
108 * to the speculatively executed instruction.
109 *
110 * Other considerations
111 *
112 * Note about "volatile" keyword dependency : The compiler will order volatile
113 * accesses so they appear in the right order on a given CPU. They can be
114 * reordered by the CPU instruction scheduling. This therefore cannot be
115 * considered as a depencency.
116 *
117 * References :
118 *
119 * Cooper, Keith D.; & Torczon, Linda. (2005). Engineering a Compiler. Morgan
120 * Kaufmann. ISBN 1-55860-698-X.
121 * Kennedy, Ken; & Allen, Randy. (2001). Optimizing Compilers for Modern
122 * Architectures: A Dependence-based Approach. Morgan Kaufmann. ISBN
123 * 1-55860-286-0.
124 * Muchnick, Steven S. (1997). Advanced Compiler Design and Implementation.
125 * Morgan Kaufmann. ISBN 1-55860-320-4.
126 */
127
128 /*
129 * Note about loops and nested calls
130 *
131 * To keep this model simple, loops expressed in the framework will behave as if
132 * there was a core synchronizing instruction between loops. To see the effect
133 * of loop unrolling, manually unrolling loops is required. Note that if loops
134 * end or start with a core synchronizing instruction, the model is appropriate.
135 * Nested calls are not supported.
136 */
137
138 /*
139 * Only Alpha has out-of-order cache bank loads. Other architectures (intel,
140 * powerpc, arm) ensure that dependent reads won't be reordered. c.f.
141 * http://www.linuxjournal.com/article/8212)
142 */
143 #ifdef ARCH_ALPHA
144 #define HAVE_OOO_CACHE_READ
145 #endif
146
147 /*
148 * Each process have its own data in cache. Caches are randomly updated.
149 * smp_wmb and smp_rmb forces cache updates (write and read), smp_mb forces
150 * both.
151 */
152
153 typedef per_proc_byte {
154 byte val[NR_PROCS];
155 };
156
157 typedef per_proc_bit {
158 bit val[NR_PROCS];
159 };
160
161 /* Bitfield has a maximum of 8 procs */
162 typedef per_proc_bitfield {
163 byte bitfield;
164 };
165
166 #define DECLARE_CACHED_VAR(type, x) \
167 type mem_##x;
168
169 #define DECLARE_PROC_CACHED_VAR(type, x)\
170 type cached_##x; \
171 bit cache_dirty_##x;
172
173 #define INIT_CACHED_VAR(x, v) \
174 mem_##x = v;
175
176 #define INIT_PROC_CACHED_VAR(x, v) \
177 cache_dirty_##x = 0; \
178 cached_##x = v;
179
180 #define IS_CACHE_DIRTY(x, id) (cache_dirty_##x)
181
182 #define READ_CACHED_VAR(x) (cached_##x)
183
184 #define WRITE_CACHED_VAR(x, v) \
185 atomic { \
186 cached_##x = v; \
187 cache_dirty_##x = 1; \
188 }
189
190 #define CACHE_WRITE_TO_MEM(x, id) \
191 if \
192 :: IS_CACHE_DIRTY(x, id) -> \
193 mem_##x = cached_##x; \
194 cache_dirty_##x = 0; \
195 :: else -> \
196 skip \
197 fi;
198
199 #define CACHE_READ_FROM_MEM(x, id) \
200 if \
201 :: !IS_CACHE_DIRTY(x, id) -> \
202 cached_##x = mem_##x; \
203 :: else -> \
204 skip \
205 fi;
206
207 /*
208 * May update other caches if cache is dirty, or not.
209 */
210 #define RANDOM_CACHE_WRITE_TO_MEM(x, id)\
211 if \
212 :: 1 -> CACHE_WRITE_TO_MEM(x, id); \
213 :: 1 -> skip \
214 fi;
215
216 #define RANDOM_CACHE_READ_FROM_MEM(x, id)\
217 if \
218 :: 1 -> CACHE_READ_FROM_MEM(x, id); \
219 :: 1 -> skip \
220 fi;
221
222 /* Must consume all prior read tokens. All subsequent reads depend on it. */
223 inline smp_rmb(i)
224 {
225 atomic {
226 CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid());
227 i = 0;
228 do
229 :: i < NR_READERS ->
230 CACHE_READ_FROM_MEM(urcu_active_readers[i], get_pid());
231 i++
232 :: i >= NR_READERS -> break
233 od;
234 CACHE_READ_FROM_MEM(rcu_ptr, get_pid());
235 i = 0;
236 do
237 :: i < SLAB_SIZE ->
238 CACHE_READ_FROM_MEM(rcu_data[i], get_pid());
239 i++
240 :: i >= SLAB_SIZE -> break
241 od;
242 }
243 }
244
245 /* Must consume all prior write tokens. All subsequent writes depend on it. */
246 inline smp_wmb(i)
247 {
248 atomic {
249 CACHE_WRITE_TO_MEM(urcu_gp_ctr, get_pid());
250 i = 0;
251 do
252 :: i < NR_READERS ->
253 CACHE_WRITE_TO_MEM(urcu_active_readers[i], get_pid());
254 i++
255 :: i >= NR_READERS -> break
256 od;
257 CACHE_WRITE_TO_MEM(rcu_ptr, get_pid());
258 i = 0;
259 do
260 :: i < SLAB_SIZE ->
261 CACHE_WRITE_TO_MEM(rcu_data[i], get_pid());
262 i++
263 :: i >= SLAB_SIZE -> break
264 od;
265 }
266 }
267
268 /* Synchronization point. Must consume all prior read and write tokens. All
269 * subsequent reads and writes depend on it. */
270 inline smp_mb(i)
271 {
272 atomic {
273 smp_wmb(i);
274 smp_rmb(i);
275 }
276 }
277
278 #ifdef REMOTE_BARRIERS
279
280 bit reader_barrier[NR_READERS];
281
282 /*
283 * We cannot leave the barriers dependencies in place in REMOTE_BARRIERS mode
284 * because they would add unexisting core synchronization and would therefore
285 * create an incomplete model.
286 * Therefore, we model the read-side memory barriers by completely disabling the
287 * memory barriers and their dependencies from the read-side. One at a time
288 * (different verification runs), we make a different instruction listen for
289 * signals.
290 */
291
292 #define smp_mb_reader(i, j)
293
294 /*
295 * Service 0, 1 or many barrier requests.
296 */
297 inline smp_mb_recv(i, j)
298 {
299 do
300 :: (reader_barrier[get_readerid()] == 1) ->
301 /*
302 * We choose to ignore cycles caused by writer busy-looping,
303 * waiting for the reader, sending barrier requests, and the
304 * reader always services them without continuing execution.
305 */
306 progress_ignoring_mb1:
307 smp_mb(i);
308 reader_barrier[get_readerid()] = 0;
309 :: 1 ->
310 /*
311 * We choose to ignore writer's non-progress caused by the
312 * reader ignoring the writer's mb() requests.
313 */
314 progress_ignoring_mb2:
315 break;
316 od;
317 }
318
319 #define PROGRESS_LABEL(progressid) progress_writer_progid_##progressid:
320
321 #define smp_mb_send(i, j, progressid) \
322 { \
323 smp_mb(i); \
324 i = 0; \
325 do \
326 :: i < NR_READERS -> \
327 reader_barrier[i] = 1; \
328 /* \
329 * Busy-looping waiting for reader barrier handling is of little\
330 * interest, given the reader has the ability to totally ignore \
331 * barrier requests. \
332 */ \
333 do \
334 :: (reader_barrier[i] == 1) -> \
335 PROGRESS_LABEL(progressid) \
336 skip; \
337 :: (reader_barrier[i] == 0) -> break; \
338 od; \
339 i++; \
340 :: i >= NR_READERS -> \
341 break \
342 od; \
343 smp_mb(i); \
344 }
345
346 #else
347
348 #define smp_mb_send(i, j, progressid) smp_mb(i)
349 #define smp_mb_reader(i, j) smp_mb(i)
350 #define smp_mb_recv(i, j)
351
352 #endif
353
354 /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */
355 DECLARE_CACHED_VAR(byte, urcu_gp_ctr);
356 /* Note ! currently only one reader */
357 DECLARE_CACHED_VAR(byte, urcu_active_readers[NR_READERS]);
358 /* RCU data */
359 DECLARE_CACHED_VAR(bit, rcu_data[SLAB_SIZE]);
360
361 /* RCU pointer */
362 #if (SLAB_SIZE == 2)
363 DECLARE_CACHED_VAR(bit, rcu_ptr);
364 bit ptr_read_first[NR_READERS];
365 #else
366 DECLARE_CACHED_VAR(byte, rcu_ptr);
367 byte ptr_read_first[NR_READERS];
368 #endif
369
370 bit data_read_first[NR_READERS];
371
372 bit init_done = 0;
373
374 inline wait_init_done()
375 {
376 do
377 :: init_done == 0 -> skip;
378 :: else -> break;
379 od;
380 }
381
382 inline ooo_mem(i)
383 {
384 atomic {
385 RANDOM_CACHE_WRITE_TO_MEM(urcu_gp_ctr, get_pid());
386 i = 0;
387 do
388 :: i < NR_READERS ->
389 RANDOM_CACHE_WRITE_TO_MEM(urcu_active_readers[i],
390 get_pid());
391 i++
392 :: i >= NR_READERS -> break
393 od;
394 RANDOM_CACHE_WRITE_TO_MEM(rcu_ptr, get_pid());
395 i = 0;
396 do
397 :: i < SLAB_SIZE ->
398 RANDOM_CACHE_WRITE_TO_MEM(rcu_data[i], get_pid());
399 i++
400 :: i >= SLAB_SIZE -> break
401 od;
402 #ifdef HAVE_OOO_CACHE_READ
403 RANDOM_CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid());
404 i = 0;
405 do
406 :: i < NR_READERS ->
407 RANDOM_CACHE_READ_FROM_MEM(urcu_active_readers[i],
408 get_pid());
409 i++
410 :: i >= NR_READERS -> break
411 od;
412 RANDOM_CACHE_READ_FROM_MEM(rcu_ptr, get_pid());
413 i = 0;
414 do
415 :: i < SLAB_SIZE ->
416 RANDOM_CACHE_READ_FROM_MEM(rcu_data[i], get_pid());
417 i++
418 :: i >= SLAB_SIZE -> break
419 od;
420 #else
421 smp_rmb(i);
422 #endif /* HAVE_OOO_CACHE_READ */
423 }
424 }
425
426 /*
427 * Bit encoding, urcu_reader :
428 */
429
430 int _proc_urcu_reader;
431 #define proc_urcu_reader _proc_urcu_reader
432
433 /* Body of PROCEDURE_READ_LOCK */
434 #define READ_PROD_A_READ (1 << 0)
435 #define READ_PROD_B_IF_TRUE (1 << 1)
436 #define READ_PROD_B_IF_FALSE (1 << 2)
437 #define READ_PROD_C_IF_TRUE_READ (1 << 3)
438
439 #define PROCEDURE_READ_LOCK(base, consumetoken, consumetoken2, producetoken) \
440 :: CONSUME_TOKENS(proc_urcu_reader, (consumetoken | consumetoken2), READ_PROD_A_READ << base) -> \
441 ooo_mem(i); \
442 tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \
443 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_A_READ << base); \
444 :: CONSUME_TOKENS(proc_urcu_reader, \
445 READ_PROD_A_READ << base, /* RAW, pre-dominant */ \
446 (READ_PROD_B_IF_TRUE | READ_PROD_B_IF_FALSE) << base) -> \
447 if \
448 :: (!(tmp & RCU_GP_CTR_NEST_MASK)) -> \
449 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_TRUE << base); \
450 :: else -> \
451 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_FALSE << base); \
452 fi; \
453 /* IF TRUE */ \
454 :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, /* prefetch */ \
455 READ_PROD_C_IF_TRUE_READ << base) -> \
456 ooo_mem(i); \
457 tmp2 = READ_CACHED_VAR(urcu_gp_ctr); \
458 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_C_IF_TRUE_READ << base); \
459 :: CONSUME_TOKENS(proc_urcu_reader, \
460 (READ_PROD_B_IF_TRUE \
461 | READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \
462 | READ_PROD_A_READ) << base, /* WAR */ \
463 producetoken) -> \
464 ooo_mem(i); \
465 WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2); \
466 PRODUCE_TOKENS(proc_urcu_reader, producetoken); \
467 /* IF_MERGE implies \
468 * post-dominance */ \
469 /* ELSE */ \
470 :: CONSUME_TOKENS(proc_urcu_reader, \
471 (READ_PROD_B_IF_FALSE /* pre-dominant */ \
472 | READ_PROD_A_READ) << base, /* WAR */ \
473 producetoken) -> \
474 ooo_mem(i); \
475 WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], \
476 tmp + 1); \
477 PRODUCE_TOKENS(proc_urcu_reader, producetoken); \
478 /* IF_MERGE implies \
479 * post-dominance */ \
480 /* ENDIF */ \
481 skip
482
483 /* Body of PROCEDURE_READ_LOCK */
484 #define READ_PROC_READ_UNLOCK (1 << 0)
485
486 #define PROCEDURE_READ_UNLOCK(base, consumetoken, producetoken) \
487 :: CONSUME_TOKENS(proc_urcu_reader, \
488 consumetoken, \
489 READ_PROC_READ_UNLOCK << base) -> \
490 ooo_mem(i); \
491 tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \
492 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_UNLOCK << base); \
493 :: CONSUME_TOKENS(proc_urcu_reader, \
494 consumetoken \
495 | (READ_PROC_READ_UNLOCK << base), /* WAR */ \
496 producetoken) -> \
497 ooo_mem(i); \
498 WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp - 1); \
499 PRODUCE_TOKENS(proc_urcu_reader, producetoken); \
500 skip
501
502
503 #define READ_PROD_NONE (1 << 0)
504
505 /* PROCEDURE_READ_LOCK base = << 1 : 1 to 5 */
506 #define READ_LOCK_BASE 1
507 #define READ_LOCK_OUT (1 << 5)
508
509 #define READ_PROC_FIRST_MB (1 << 6)
510
511 #define READ_PROC_READ_GEN (1 << 12)
512 #define READ_PROC_ACCESS_GEN (1 << 13)
513
514 #define READ_PROC_SECOND_MB (1 << 16)
515
516 /* PROCEDURE_READ_UNLOCK base = << 17 : 17 to 18 */
517 #define READ_UNLOCK_BASE 17
518 #define READ_UNLOCK_OUT (1 << 18)
519
520 /* Should not include branches */
521 #define READ_PROC_ALL_TOKENS (READ_PROD_NONE \
522 | READ_LOCK_OUT \
523 | READ_PROC_FIRST_MB \
524 | READ_PROC_READ_GEN \
525 | READ_PROC_ACCESS_GEN \
526 | READ_PROC_SECOND_MB \
527 | READ_UNLOCK_OUT)
528
529 /* Must clear all tokens, including branches */
530 #define READ_PROC_ALL_TOKENS_CLEAR ((1 << 30) - 1)
531
532 inline urcu_one_read(i, j, nest_i, tmp, tmp2)
533 {
534 PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_NONE);
535
536 #ifdef NO_MB
537 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
538 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_SECOND_MB);
539 #endif
540
541 #ifdef REMOTE_BARRIERS
542 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
543 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_SECOND_MB);
544 #endif
545
546 do
547 :: 1 ->
548
549 #ifdef REMOTE_BARRIERS
550 /*
551 * Signal-based memory barrier will only execute when the
552 * execution order appears in program order.
553 */
554 if
555 :: 1 ->
556 atomic {
557 if
558 :: CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE,
559 READ_LOCK_OUT
560 | READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN
561 | READ_UNLOCK_OUT)
562 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE
563 | READ_LOCK_OUT,
564 READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN
565 | READ_UNLOCK_OUT)
566 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE
567 | READ_LOCK_OUT
568 | READ_PROC_READ_GEN, READ_PROC_ACCESS_GEN
569 | READ_UNLOCK_OUT)
570 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE
571 | READ_LOCK_OUT
572 | READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN,
573 READ_UNLOCK_OUT)
574 || CONSUME_TOKENS(proc_urcu_reader, READ_PROD_NONE
575 | READ_LOCK_OUT
576 | READ_PROC_READ_GEN | READ_PROC_ACCESS_GEN
577 | READ_UNLOCK_OUT, 0) ->
578 goto non_atomic3;
579 non_atomic3_end:
580 skip;
581 fi;
582 }
583 fi;
584
585 goto non_atomic3_skip;
586 non_atomic3:
587 smp_mb_recv(i, j);
588 goto non_atomic3_end;
589 non_atomic3_skip:
590
591 #endif /* REMOTE_BARRIERS */
592
593 atomic {
594 if
595 PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, 0, READ_LOCK_OUT);
596
597 :: CONSUME_TOKENS(proc_urcu_reader,
598 READ_LOCK_OUT, /* post-dominant */
599 READ_PROC_FIRST_MB) ->
600 smp_mb_reader(i, j);
601 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
602
603 :: CONSUME_TOKENS(proc_urcu_reader,
604 READ_PROC_FIRST_MB, /* mb() orders reads */
605 READ_PROC_READ_GEN) ->
606 ooo_mem(i);
607 ptr_read_first[get_readerid()] = READ_CACHED_VAR(rcu_ptr);
608 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_GEN);
609
610 :: CONSUME_TOKENS(proc_urcu_reader,
611 READ_PROC_FIRST_MB /* mb() orders reads */
612 | READ_PROC_READ_GEN,
613 READ_PROC_ACCESS_GEN) ->
614 /* smp_read_barrier_depends */
615 goto rmb1;
616 rmb1_end:
617 data_read_first[get_readerid()] =
618 READ_CACHED_VAR(rcu_data[ptr_read_first[get_readerid()]]);
619 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_ACCESS_GEN);
620
621
622 :: CONSUME_TOKENS(proc_urcu_reader,
623 READ_PROC_ACCESS_GEN /* mb() orders reads */
624 | READ_PROC_READ_GEN /* mb() orders reads */
625 | READ_PROC_FIRST_MB /* mb() ordered */
626 | READ_LOCK_OUT, /* post-dominant */
627 READ_PROC_SECOND_MB) ->
628 smp_mb_reader(i, j);
629 PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_SECOND_MB);
630
631 PROCEDURE_READ_UNLOCK(READ_UNLOCK_BASE,
632 READ_PROC_SECOND_MB /* mb() orders reads */
633 | READ_PROC_FIRST_MB /* mb() orders reads */
634 | READ_LOCK_OUT, /* RAW */
635 READ_UNLOCK_OUT);
636
637 :: CONSUME_TOKENS(proc_urcu_reader, READ_PROC_ALL_TOKENS, 0) ->
638 CLEAR_TOKENS(proc_urcu_reader, READ_PROC_ALL_TOKENS_CLEAR);
639 break;
640 fi;
641 }
642 od;
643 /*
644 * Dependency between consecutive loops :
645 * RAW dependency on
646 * WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2 - 1)
647 * tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]);
648 * between loops.
649 * _WHEN THE MB()s are in place_, they add full ordering of the
650 * generation pointer read wrt active reader count read, which ensures
651 * execution will not spill across loop execution.
652 * However, in the event mb()s are removed (execution using signal
653 * handler to promote barrier()() -> smp_mb()), nothing prevents one loop
654 * to spill its execution on other loop's execution.
655 */
656 goto end;
657 rmb1:
658 #ifndef NO_RMB
659 smp_rmb(i);
660 #else
661 ooo_mem(i);
662 #endif
663 goto rmb1_end;
664 end:
665 skip;
666 }
667
668
669
670 active proctype urcu_reader()
671 {
672 byte i, j, nest_i;
673 byte tmp, tmp2;
674
675 /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */
676 DECLARE_PROC_CACHED_VAR(byte, urcu_gp_ctr);
677 /* Note ! currently only one reader */
678 DECLARE_PROC_CACHED_VAR(byte, urcu_active_readers[NR_READERS]);
679 /* RCU data */
680 DECLARE_PROC_CACHED_VAR(bit, rcu_data[SLAB_SIZE]);
681
682 /* RCU pointer */
683 #if (SLAB_SIZE == 2)
684 DECLARE_PROC_CACHED_VAR(bit, rcu_ptr);
685 #else
686 DECLARE_PROC_CACHED_VAR(byte, rcu_ptr);
687 #endif
688
689 atomic {
690 INIT_PROC_CACHED_VAR(urcu_gp_ctr, 1);
691 INIT_PROC_CACHED_VAR(rcu_ptr, 0);
692
693 i = 0;
694 do
695 :: i < NR_READERS ->
696 INIT_PROC_CACHED_VAR(urcu_active_readers[i], 0);
697 i++;
698 :: i >= NR_READERS -> break
699 od;
700 INIT_PROC_CACHED_VAR(rcu_data[0], WINE);
701 i = 1;
702 do
703 :: i < SLAB_SIZE ->
704 INIT_PROC_CACHED_VAR(rcu_data[i], POISON);
705 i++
706 :: i >= SLAB_SIZE -> break
707 od;
708 }
709
710 wait_init_done();
711
712 assert(get_pid() < NR_PROCS);
713
714 end_reader:
715 do
716 :: 1 ->
717 /*
718 * We do not test reader's progress here, because we are mainly
719 * interested in writer's progress. The reader never blocks
720 * anyway. We have to test for reader/writer's progress
721 * separately, otherwise we could think the writer is doing
722 * progress when it's blocked by an always progressing reader.
723 */
724 #ifdef READER_PROGRESS
725 progress_reader:
726 #endif
727 urcu_one_read(i, j, nest_i, tmp, tmp2);
728 od;
729 }
730
731 /* no name clash please */
732 #undef proc_urcu_reader
733
734
735 /* Model the RCU update process. */
736
737 /*
738 * Bit encoding, urcu_writer :
739 * Currently only supports one reader.
740 */
741
742 int _proc_urcu_writer;
743 #define proc_urcu_writer _proc_urcu_writer
744
745 #define WRITE_PROD_NONE (1 << 0)
746
747 #define WRITE_DATA (1 << 1)
748 #define WRITE_PROC_WMB (1 << 2)
749 #define WRITE_XCHG_PTR (1 << 3)
750
751 #define WRITE_PROC_FIRST_MB (1 << 4)
752
753 /* first flip */
754 #define WRITE_PROC_FIRST_READ_GP (1 << 5)
755 #define WRITE_PROC_FIRST_WRITE_GP (1 << 6)
756 #define WRITE_PROC_FIRST_WAIT (1 << 7)
757 #define WRITE_PROC_FIRST_WAIT_LOOP (1 << 8)
758
759 /* second flip */
760 #define WRITE_PROC_SECOND_READ_GP (1 << 9)
761 #define WRITE_PROC_SECOND_WRITE_GP (1 << 10)
762 #define WRITE_PROC_SECOND_WAIT (1 << 11)
763 #define WRITE_PROC_SECOND_WAIT_LOOP (1 << 12)
764
765 #define WRITE_PROC_SECOND_MB (1 << 13)
766
767 #define WRITE_FREE (1 << 14)
768
769 #define WRITE_PROC_ALL_TOKENS (WRITE_PROD_NONE \
770 | WRITE_DATA \
771 | WRITE_PROC_WMB \
772 | WRITE_XCHG_PTR \
773 | WRITE_PROC_FIRST_MB \
774 | WRITE_PROC_FIRST_READ_GP \
775 | WRITE_PROC_FIRST_WRITE_GP \
776 | WRITE_PROC_FIRST_WAIT \
777 | WRITE_PROC_SECOND_READ_GP \
778 | WRITE_PROC_SECOND_WRITE_GP \
779 | WRITE_PROC_SECOND_WAIT \
780 | WRITE_PROC_SECOND_MB \
781 | WRITE_FREE)
782
783 #define WRITE_PROC_ALL_TOKENS_CLEAR ((1 << 15) - 1)
784
785 /*
786 * Mutexes are implied around writer execution. A single writer at a time.
787 */
788 active proctype urcu_writer()
789 {
790 byte i, j;
791 byte tmp, tmp2, tmpa;
792 byte cur_data = 0, old_data, loop_nr = 0;
793 byte cur_gp_val = 0; /*
794 * Keep a local trace of the current parity so
795 * we don't add non-existing dependencies on the global
796 * GP update. Needed to test single flip case.
797 */
798
799 /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */
800 DECLARE_PROC_CACHED_VAR(byte, urcu_gp_ctr);
801 /* Note ! currently only one reader */
802 DECLARE_PROC_CACHED_VAR(byte, urcu_active_readers[NR_READERS]);
803 /* RCU data */
804 DECLARE_PROC_CACHED_VAR(bit, rcu_data[SLAB_SIZE]);
805
806 /* RCU pointer */
807 #if (SLAB_SIZE == 2)
808 DECLARE_PROC_CACHED_VAR(bit, rcu_ptr);
809 #else
810 DECLARE_PROC_CACHED_VAR(byte, rcu_ptr);
811 #endif
812
813 atomic {
814 INIT_PROC_CACHED_VAR(urcu_gp_ctr, 1);
815 INIT_PROC_CACHED_VAR(rcu_ptr, 0);
816
817 i = 0;
818 do
819 :: i < NR_READERS ->
820 INIT_PROC_CACHED_VAR(urcu_active_readers[i], 0);
821 i++;
822 :: i >= NR_READERS -> break
823 od;
824 INIT_PROC_CACHED_VAR(rcu_data[0], WINE);
825 i = 1;
826 do
827 :: i < SLAB_SIZE ->
828 INIT_PROC_CACHED_VAR(rcu_data[i], POISON);
829 i++
830 :: i >= SLAB_SIZE -> break
831 od;
832 }
833
834
835 wait_init_done();
836
837 assert(get_pid() < NR_PROCS);
838
839 do
840 :: (loop_nr < 3) ->
841 #ifdef WRITER_PROGRESS
842 progress_writer1:
843 #endif
844 loop_nr = loop_nr + 1;
845
846 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROD_NONE);
847
848 #ifdef NO_WMB
849 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_WMB);
850 #endif
851
852 #ifdef NO_MB
853 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_MB);
854 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_MB);
855 #endif
856
857 #ifdef SINGLE_FLIP
858 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
859 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP);
860 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT);
861 /* For single flip, we need to know the current parity */
862 cur_gp_val = cur_gp_val ^ RCU_GP_CTR_BIT;
863 #endif
864
865 do :: 1 ->
866 atomic {
867 if
868
869 :: CONSUME_TOKENS(proc_urcu_writer,
870 WRITE_PROD_NONE,
871 WRITE_DATA) ->
872 ooo_mem(i);
873 cur_data = (cur_data + 1) % SLAB_SIZE;
874 WRITE_CACHED_VAR(rcu_data[cur_data], WINE);
875 PRODUCE_TOKENS(proc_urcu_writer, WRITE_DATA);
876
877
878 :: CONSUME_TOKENS(proc_urcu_writer,
879 WRITE_DATA,
880 WRITE_PROC_WMB) ->
881 smp_wmb(i);
882 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_WMB);
883
884 :: CONSUME_TOKENS(proc_urcu_writer,
885 WRITE_PROC_WMB,
886 WRITE_XCHG_PTR) ->
887 /* rcu_xchg_pointer() */
888 atomic {
889 old_data = READ_CACHED_VAR(rcu_ptr);
890 WRITE_CACHED_VAR(rcu_ptr, cur_data);
891 }
892 PRODUCE_TOKENS(proc_urcu_writer, WRITE_XCHG_PTR);
893
894 :: CONSUME_TOKENS(proc_urcu_writer,
895 WRITE_DATA | WRITE_PROC_WMB | WRITE_XCHG_PTR,
896 WRITE_PROC_FIRST_MB) ->
897 goto smp_mb_send1;
898 smp_mb_send1_end:
899 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_MB);
900
901 /* first flip */
902 :: CONSUME_TOKENS(proc_urcu_writer,
903 WRITE_PROC_FIRST_MB,
904 WRITE_PROC_FIRST_READ_GP) ->
905 tmpa = READ_CACHED_VAR(urcu_gp_ctr);
906 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_READ_GP);
907 :: CONSUME_TOKENS(proc_urcu_writer,
908 WRITE_PROC_FIRST_MB | WRITE_PROC_WMB
909 | WRITE_PROC_FIRST_READ_GP,
910 WRITE_PROC_FIRST_WRITE_GP) ->
911 ooo_mem(i);
912 WRITE_CACHED_VAR(urcu_gp_ctr, tmpa ^ RCU_GP_CTR_BIT);
913 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WRITE_GP);
914
915 :: CONSUME_TOKENS(proc_urcu_writer,
916 //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
917 WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
918 WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_WAIT_LOOP) ->
919 ooo_mem(i);
920 //smp_mb(i); /* TEST */
921 /* ONLY WAITING FOR READER 0 */
922 tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
923 #ifndef SINGLE_FLIP
924 /* In normal execution, we are always starting by
925 * waiting for the even parity.
926 */
927 cur_gp_val = RCU_GP_CTR_BIT;
928 #endif
929 if
930 :: (tmp2 & RCU_GP_CTR_NEST_MASK)
931 && ((tmp2 ^ cur_gp_val) & RCU_GP_CTR_BIT) ->
932 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WAIT_LOOP);
933 :: else ->
934 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WAIT);
935 fi;
936
937 :: CONSUME_TOKENS(proc_urcu_writer,
938 //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */
939 WRITE_PROC_FIRST_WRITE_GP
940 | WRITE_PROC_FIRST_READ_GP
941 | WRITE_PROC_FIRST_WAIT_LOOP
942 | WRITE_DATA | WRITE_PROC_WMB | WRITE_XCHG_PTR
943 | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
944 0) ->
945 #ifndef GEN_ERROR_WRITER_PROGRESS
946 goto smp_mb_send2;
947 smp_mb_send2_end:
948 /* The memory barrier will invalidate the
949 * second read done as prefetching. Note that all
950 * instructions with side-effects depending on
951 * WRITE_PROC_SECOND_READ_GP should also depend on
952 * completion of this busy-waiting loop. */
953 CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
954 #else
955 ooo_mem(i);
956 #endif
957 /* This instruction loops to WRITE_PROC_FIRST_WAIT */
958 CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WAIT_LOOP | WRITE_PROC_FIRST_WAIT);
959
960 /* second flip */
961 :: CONSUME_TOKENS(proc_urcu_writer,
962 //WRITE_PROC_FIRST_WAIT | //test /* no dependency. Could pre-fetch, no side-effect. */
963 WRITE_PROC_FIRST_WRITE_GP
964 | WRITE_PROC_FIRST_READ_GP
965 | WRITE_PROC_FIRST_MB,
966 WRITE_PROC_SECOND_READ_GP) ->
967 ooo_mem(i);
968 //smp_mb(i); /* TEST */
969 tmpa = READ_CACHED_VAR(urcu_gp_ctr);
970 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
971 :: CONSUME_TOKENS(proc_urcu_writer,
972 WRITE_PROC_FIRST_WAIT /* dependency on first wait, because this
973 * instruction has globally observable
974 * side-effects.
975 */
976 | WRITE_PROC_FIRST_MB
977 | WRITE_PROC_WMB
978 | WRITE_PROC_FIRST_READ_GP
979 | WRITE_PROC_FIRST_WRITE_GP
980 | WRITE_PROC_SECOND_READ_GP,
981 WRITE_PROC_SECOND_WRITE_GP) ->
982 ooo_mem(i);
983 WRITE_CACHED_VAR(urcu_gp_ctr, tmpa ^ RCU_GP_CTR_BIT);
984 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP);
985
986 :: CONSUME_TOKENS(proc_urcu_writer,
987 //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
988 WRITE_PROC_FIRST_WAIT
989 | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
990 WRITE_PROC_SECOND_WAIT | WRITE_PROC_SECOND_WAIT_LOOP) ->
991 ooo_mem(i);
992 //smp_mb(i); /* TEST */
993 /* ONLY WAITING FOR READER 0 */
994 tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
995 if
996 :: (tmp2 & RCU_GP_CTR_NEST_MASK)
997 && ((tmp2 ^ 0) & RCU_GP_CTR_BIT) ->
998 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT_LOOP);
999 :: else ->
1000 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT);
1001 fi;
1002
1003 :: CONSUME_TOKENS(proc_urcu_writer,
1004 //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
1005 WRITE_PROC_SECOND_WRITE_GP
1006 | WRITE_PROC_FIRST_WRITE_GP
1007 | WRITE_PROC_SECOND_READ_GP
1008 | WRITE_PROC_FIRST_READ_GP
1009 | WRITE_PROC_SECOND_WAIT_LOOP
1010 | WRITE_DATA | WRITE_PROC_WMB | WRITE_XCHG_PTR
1011 | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
1012 0) ->
1013 #ifndef GEN_ERROR_WRITER_PROGRESS
1014 goto smp_mb_send3;
1015 smp_mb_send3_end:
1016 #else
1017 ooo_mem(i);
1018 #endif
1019 /* This instruction loops to WRITE_PROC_SECOND_WAIT */
1020 CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT_LOOP | WRITE_PROC_SECOND_WAIT);
1021
1022
1023 :: CONSUME_TOKENS(proc_urcu_writer,
1024 WRITE_PROC_FIRST_WAIT
1025 | WRITE_PROC_SECOND_WAIT
1026 | WRITE_PROC_FIRST_READ_GP
1027 | WRITE_PROC_SECOND_READ_GP
1028 | WRITE_PROC_FIRST_WRITE_GP
1029 | WRITE_PROC_SECOND_WRITE_GP
1030 | WRITE_DATA | WRITE_PROC_WMB | WRITE_XCHG_PTR
1031 | WRITE_PROC_FIRST_MB,
1032 WRITE_PROC_SECOND_MB) ->
1033 goto smp_mb_send4;
1034 smp_mb_send4_end:
1035 PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_MB);
1036
1037 :: CONSUME_TOKENS(proc_urcu_writer,
1038 WRITE_XCHG_PTR
1039 | WRITE_PROC_FIRST_WAIT
1040 | WRITE_PROC_SECOND_WAIT
1041 | WRITE_PROC_WMB /* No dependency on
1042 * WRITE_DATA because we
1043 * write to a
1044 * different location. */
1045 | WRITE_PROC_SECOND_MB
1046 | WRITE_PROC_FIRST_MB,
1047 WRITE_FREE) ->
1048 WRITE_CACHED_VAR(rcu_data[old_data], POISON);
1049 PRODUCE_TOKENS(proc_urcu_writer, WRITE_FREE);
1050
1051 :: CONSUME_TOKENS(proc_urcu_writer, WRITE_PROC_ALL_TOKENS, 0) ->
1052 CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_ALL_TOKENS_CLEAR);
1053 break;
1054 fi;
1055 }
1056 od;
1057 /*
1058 * Note : Promela model adds implicit serialization of the
1059 * WRITE_FREE instruction. Normally, it would be permitted to
1060 * spill on the next loop execution. Given the validation we do
1061 * checks for the data entry read to be poisoned, it's ok if
1062 * we do not check "late arriving" memory poisoning.
1063 */
1064 :: else -> break;
1065 od;
1066 /*
1067 * Given the reader loops infinitely, let the writer also busy-loop
1068 * with progress here so, with weak fairness, we can test the
1069 * writer's progress.
1070 */
1071 end_writer:
1072 do
1073 :: 1 ->
1074 #ifdef WRITER_PROGRESS
1075 progress_writer2:
1076 #endif
1077 #ifdef READER_PROGRESS
1078 /*
1079 * Make sure we don't block the reader's progress.
1080 */
1081 smp_mb_send(i, j, 5);
1082 #endif
1083 skip;
1084 od;
1085
1086 /* Non-atomic parts of the loop */
1087 goto end;
1088 smp_mb_send1:
1089 smp_mb_send(i, j, 1);
1090 goto smp_mb_send1_end;
1091 #ifndef GEN_ERROR_WRITER_PROGRESS
1092 smp_mb_send2:
1093 smp_mb_send(i, j, 2);
1094 goto smp_mb_send2_end;
1095 smp_mb_send3:
1096 smp_mb_send(i, j, 3);
1097 goto smp_mb_send3_end;
1098 #endif
1099 smp_mb_send4:
1100 smp_mb_send(i, j, 4);
1101 goto smp_mb_send4_end;
1102 end:
1103 skip;
1104 }
1105
1106 /* no name clash please */
1107 #undef proc_urcu_writer
1108
1109
1110 /* Leave after the readers and writers so the pid count is ok. */
1111 init {
1112 byte i, j;
1113
1114 atomic {
1115 INIT_CACHED_VAR(urcu_gp_ctr, 1);
1116 INIT_CACHED_VAR(rcu_ptr, 0);
1117
1118 i = 0;
1119 do
1120 :: i < NR_READERS ->
1121 INIT_CACHED_VAR(urcu_active_readers[i], 0);
1122 ptr_read_first[i] = 1;
1123 data_read_first[i] = WINE;
1124 i++;
1125 :: i >= NR_READERS -> break
1126 od;
1127 INIT_CACHED_VAR(rcu_data[0], WINE);
1128 i = 1;
1129 do
1130 :: i < SLAB_SIZE ->
1131 INIT_CACHED_VAR(rcu_data[i], POISON);
1132 i++
1133 :: i >= SLAB_SIZE -> break
1134 od;
1135
1136 init_done = 1;
1137 }
1138 }
This page took 0.087038 seconds and 4 git commands to generate.