X-Git-Url: http://git.liburcu.org/?a=blobdiff_plain;f=formal-model%2Furcu-controldataflow%2Furcu.spin;h=54752a17708dbec58c2872ca663c0afe7efe4c0b;hb=ba59a0c7b244a0939a2298fc76a9002436ef9674;hp=12f841ce46ed7225be2c8fe83b63d938483a00e6;hpb=caeea74ce58faba3ef2bb7d2bd925d9009803086;p=urcu.git diff --git a/formal-model/urcu-controldataflow/urcu.spin b/formal-model/urcu-controldataflow/urcu.spin index 12f841c..54752a1 100644 --- a/formal-model/urcu-controldataflow/urcu.spin +++ b/formal-model/urcu-controldataflow/urcu.spin @@ -98,6 +98,15 @@ * output exchanged. Therefore, i post-dominating j ensures that every path * passing by j will pass by i before reaching the output. * + * Prefetch and speculative execution + * + * If an instruction depends on the result of a previous branch, but it does not + * have side-effects, it can be executed before the branch result is known. + * however, it must be restarted if a core-synchronizing instruction is issued. + * Note that instructions which depend on the speculative instruction result + * but that have side-effects must depend on the branch completion in addition + * to the speculatively executed instruction. + * * Other considerations * * Note about "volatile" keyword dependency : The compiler will order volatile @@ -126,6 +135,15 @@ * Nested calls are not supported. */ +/* + * Only Alpha has out-of-order cache bank loads. Other architectures (intel, + * powerpc, arm) ensure that dependent reads won't be reordered. c.f. + * http://www.linuxjournal.com/article/8212) + */ +#ifdef ARCH_ALPHA +#define HAVE_OOO_CACHE_READ +#endif + /* * Each process have its own data in cache. Caches are randomly updated. * smp_wmb and smp_rmb forces cache updates (write and read), smp_mb forces @@ -206,7 +224,7 @@ typedef per_proc_bitfield { fi; /* Must consume all prior read tokens. All subsequent reads depend on it. */ -inline smp_rmb(i, j) +inline smp_rmb(i) { atomic { CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid()); @@ -229,7 +247,7 @@ inline smp_rmb(i, j) } /* Must consume all prior write tokens. All subsequent writes depend on it. */ -inline smp_wmb(i, j) +inline smp_wmb(i) { atomic { CACHE_WRITE_TO_MEM(urcu_gp_ctr, get_pid()); @@ -253,11 +271,11 @@ inline smp_wmb(i, j) /* Synchronization point. Must consume all prior read and write tokens. All * subsequent reads and writes depend on it. */ -inline smp_mb(i, j) +inline smp_mb(i) { atomic { - smp_wmb(i, j); - smp_rmb(i, j); + smp_wmb(i); + smp_rmb(i); } } @@ -284,28 +302,29 @@ inline smp_mb_recv(i, j) { do :: (reader_barrier[get_readerid()] == 1) -> - smp_mb(i, j); + /* + * We choose to ignore cycles caused by writer busy-looping, + * waiting for the reader, sending barrier requests, and the + * reader always services them without continuing execution. + */ +progress_ignoring_mb1: + smp_mb(i); reader_barrier[get_readerid()] = 0; :: 1 -> - /* We choose to ignore writer's non-progress caused from the - * reader ignoring the writer's mb() requests */ -#ifdef WRITER_PROGRESS -progress_writer_from_reader: -#endif + /* + * We choose to ignore writer's non-progress caused by the + * reader ignoring the writer's mb() requests. + */ +progress_ignoring_mb2: break; od; } -#ifdef WRITER_PROGRESS -//#define PROGRESS_LABEL(progressid) progress_writer_progid_##progressid: -#define PROGRESS_LABEL(progressid) -#else -#define PROGRESS_LABEL(progressid) -#endif +#define PROGRESS_LABEL(progressid) progress_writer_progid_##progressid: #define smp_mb_send(i, j, progressid) \ { \ - smp_mb(i, j); \ + smp_mb(i); \ i = 0; \ do \ :: i < NR_READERS -> \ @@ -315,22 +334,23 @@ progress_writer_from_reader: * interest, given the reader has the ability to totally ignore \ * barrier requests. \ */ \ -PROGRESS_LABEL(progressid) \ do \ - :: (reader_barrier[i] == 1) -> skip; \ + :: (reader_barrier[i] == 1) -> \ +PROGRESS_LABEL(progressid) \ + skip; \ :: (reader_barrier[i] == 0) -> break; \ od; \ i++; \ :: i >= NR_READERS -> \ break \ od; \ - smp_mb(i, j); \ + smp_mb(i); \ } #else -#define smp_mb_send(i, j, progressid) smp_mb(i, j) -#define smp_mb_reader smp_mb +#define smp_mb_send(i, j, progressid) smp_mb(i) +#define smp_mb_reader(i, j) smp_mb(i) #define smp_mb_recv(i, j) #endif @@ -386,6 +406,7 @@ inline ooo_mem(i) i++ :: i >= SLAB_SIZE -> break od; +#ifdef HAVE_OOO_CACHE_READ RANDOM_CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid()); i = 0; do @@ -403,6 +424,9 @@ inline ooo_mem(i) i++ :: i >= SLAB_SIZE -> break od; +#else + smp_rmb(i); +#endif /* HAVE_OOO_CACHE_READ */ } } @@ -419,8 +443,8 @@ int _proc_urcu_reader; #define READ_PROD_B_IF_FALSE (1 << 2) #define READ_PROD_C_IF_TRUE_READ (1 << 3) -#define PROCEDURE_READ_LOCK(base, consumetoken, producetoken) \ - :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, READ_PROD_A_READ << base) -> \ +#define PROCEDURE_READ_LOCK(base, consumetoken, consumetoken2, producetoken) \ + :: CONSUME_TOKENS(proc_urcu_reader, (consumetoken | consumetoken2), READ_PROD_A_READ << base) -> \ ooo_mem(i); \ tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \ PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_A_READ << base); \ @@ -434,13 +458,14 @@ int _proc_urcu_reader; PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_FALSE << base); \ fi; \ /* IF TRUE */ \ - :: CONSUME_TOKENS(proc_urcu_reader, READ_PROD_B_IF_TRUE << base, \ + :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, /* prefetch */ \ READ_PROD_C_IF_TRUE_READ << base) -> \ ooo_mem(i); \ tmp2 = READ_CACHED_VAR(urcu_gp_ctr); \ PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_C_IF_TRUE_READ << base); \ :: CONSUME_TOKENS(proc_urcu_reader, \ - (READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \ + (READ_PROD_B_IF_TRUE \ + | READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \ | READ_PROD_A_READ) << base, /* WAR */ \ producetoken) -> \ ooo_mem(i); \ @@ -470,14 +495,14 @@ int _proc_urcu_reader; consumetoken, \ READ_PROC_READ_UNLOCK << base) -> \ ooo_mem(i); \ - tmp2 = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \ + tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \ PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_UNLOCK << base); \ :: CONSUME_TOKENS(proc_urcu_reader, \ consumetoken \ | (READ_PROC_READ_UNLOCK << base), /* WAR */ \ producetoken) -> \ ooo_mem(i); \ - WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2 - 1); \ + WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp - 1); \ PRODUCE_TOKENS(proc_urcu_reader, producetoken); \ skip @@ -641,7 +666,6 @@ non_atomic3_end: skip; fi; } - :: 1 -> skip; fi; goto non_atomic3_skip; @@ -654,7 +678,7 @@ non_atomic3_skip: atomic { if - PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, READ_LOCK_OUT); + PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, 0, READ_LOCK_OUT); :: CONSUME_TOKENS(proc_urcu_reader, READ_LOCK_OUT, /* post-dominant */ @@ -662,7 +686,7 @@ non_atomic3_skip: smp_mb_reader(i, j); PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB); - PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB | READ_LOCK_OUT, + PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB, READ_LOCK_OUT, READ_LOCK_NESTED_OUT); :: CONSUME_TOKENS(proc_urcu_reader, @@ -720,12 +744,12 @@ rmb1_end: /* reading urcu_active_readers, which have been written by * READ_UNLOCK_OUT : RAW */ PROCEDURE_READ_LOCK(READ_LOCK_UNROLL_BASE, - READ_UNLOCK_OUT /* RAW */ - | READ_PROC_SECOND_MB /* mb() orders reads */ - | READ_PROC_FIRST_MB /* mb() orders reads */ - | READ_LOCK_NESTED_OUT /* RAW */ + READ_PROC_SECOND_MB /* mb() orders reads */ + | READ_PROC_FIRST_MB, /* mb() orders reads */ + READ_LOCK_NESTED_OUT /* RAW */ | READ_LOCK_OUT /* RAW */ - | READ_UNLOCK_NESTED_OUT, /* RAW */ + | READ_UNLOCK_NESTED_OUT /* RAW */ + | READ_UNLOCK_OUT, /* RAW */ READ_LOCK_OUT_UNROLL); @@ -810,14 +834,14 @@ rmb2_end: goto end; rmb1: #ifndef NO_RMB - smp_rmb(i, j); + smp_rmb(i); #else ooo_mem(i); #endif goto rmb1_end; rmb2: #ifndef NO_RMB - smp_rmb(i, j); + smp_rmb(i); #else ooo_mem(i); #endif @@ -927,7 +951,7 @@ active proctype urcu_writer() assert(get_pid() < NR_PROCS); do - :: (loop_nr < 4) -> + :: (loop_nr < 3) -> #ifdef WRITER_PROGRESS progress_writer1: #endif @@ -968,7 +992,7 @@ progress_writer1: :: CONSUME_TOKENS(proc_urcu_writer, WRITE_DATA, WRITE_PROC_WMB) -> - smp_wmb(i, j); + smp_wmb(i); PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_WMB); :: CONSUME_TOKENS(proc_urcu_writer, @@ -1003,10 +1027,11 @@ smp_mb_send1_end: PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WRITE_GP); :: CONSUME_TOKENS(proc_urcu_writer, - //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */ + //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */ WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */ WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_WAIT_LOOP) -> ooo_mem(i); + //smp_mb(i); /* TEST */ /* ONLY WAITING FOR READER 0 */ tmp2 = READ_CACHED_VAR(urcu_active_readers[0]); #ifndef SINGLE_FLIP @@ -1034,6 +1059,12 @@ smp_mb_send1_end: #ifndef GEN_ERROR_WRITER_PROGRESS goto smp_mb_send2; smp_mb_send2_end: + /* The memory barrier will invalidate the + * second read done as prefetching. Note that all + * instructions with side-effects depending on + * WRITE_PROC_SECOND_READ_GP should also depend on + * completion of this busy-waiting loop. */ + CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP); #else ooo_mem(i); #endif @@ -1042,17 +1073,21 @@ smp_mb_send2_end: /* second flip */ :: CONSUME_TOKENS(proc_urcu_writer, - WRITE_PROC_FIRST_WAIT /* Control dependency : need to branch out of - * the loop to execute the next flip (CHECK) */ - | WRITE_PROC_FIRST_WRITE_GP + //WRITE_PROC_FIRST_WAIT | //test /* no dependency. Could pre-fetch, no side-effect. */ + WRITE_PROC_FIRST_WRITE_GP | WRITE_PROC_FIRST_READ_GP | WRITE_PROC_FIRST_MB, WRITE_PROC_SECOND_READ_GP) -> ooo_mem(i); + //smp_mb(i); /* TEST */ tmpa = READ_CACHED_VAR(urcu_gp_ctr); PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP); :: CONSUME_TOKENS(proc_urcu_writer, - WRITE_PROC_FIRST_MB + WRITE_PROC_FIRST_WAIT /* dependency on first wait, because this + * instruction has globally observable + * side-effects. + */ + | WRITE_PROC_FIRST_MB | WRITE_PROC_WMB | WRITE_PROC_FIRST_READ_GP | WRITE_PROC_FIRST_WRITE_GP @@ -1063,11 +1098,12 @@ smp_mb_send2_end: PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP); :: CONSUME_TOKENS(proc_urcu_writer, - //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */ + //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */ WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */ WRITE_PROC_SECOND_WAIT | WRITE_PROC_SECOND_WAIT_LOOP) -> ooo_mem(i); + //smp_mb(i); /* TEST */ /* ONLY WAITING FOR READER 0 */ tmp2 = READ_CACHED_VAR(urcu_active_readers[0]); if @@ -1079,7 +1115,7 @@ smp_mb_send2_end: fi; :: CONSUME_TOKENS(proc_urcu_writer, - //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */ + //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */ WRITE_PROC_SECOND_WRITE_GP | WRITE_PROC_FIRST_WRITE_GP | WRITE_PROC_SECOND_READ_GP @@ -1151,6 +1187,12 @@ end_writer: :: 1 -> #ifdef WRITER_PROGRESS progress_writer2: +#endif +#ifdef READER_PROGRESS + /* + * Make sure we don't block the reader's progress. + */ + smp_mb_send(i, j, 5); #endif skip; od;