X-Git-Url: https://git.liburcu.org/?a=blobdiff_plain;f=formal-model%2Furcu-controldataflow%2Furcu.spin;h=8075506ec66c4cb21dca6dbb9efa8a5e8a8c9f9e;hb=41e967af0a4bd23a88b87be39a6c7f7d68a9e2ca;hp=6ec1655654367c75e78ca547dcbaf07fe4af73ca;hpb=f089ec2496f9d72eb737de6ac4786b81f5d55a7e;p=urcu.git diff --git a/formal-model/urcu-controldataflow/urcu.spin b/formal-model/urcu-controldataflow/urcu.spin index 6ec1655..8075506 100644 --- a/formal-model/urcu-controldataflow/urcu.spin +++ b/formal-model/urcu-controldataflow/urcu.spin @@ -98,6 +98,15 @@ * output exchanged. Therefore, i post-dominating j ensures that every path * passing by j will pass by i before reaching the output. * + * Prefetch and speculative execution + * + * If an instruction depends on the result of a previous branch, but it does not + * have side-effects, it can be executed before the branch result is known. + * however, it must be restarted if a core-synchronizing instruction is issued. + * Note that instructions which depend on the speculative instruction result + * but that have side-effects must depend on the branch completion in addition + * to the speculatively executed instruction. + * * Other considerations * * Note about "volatile" keyword dependency : The compiler will order volatile @@ -126,6 +135,15 @@ * Nested calls are not supported. */ +/* + * Only Alpha has out-of-order cache bank loads. Other architectures (intel, + * powerpc, arm) ensure that dependent reads won't be reordered. c.f. + * http://www.linuxjournal.com/article/8212) + */ +#ifdef ARCH_ALPHA +#define HAVE_OOO_CACHE_READ +#endif + /* * Each process have its own data in cache. Caches are randomly updated. * smp_wmb and smp_rmb forces cache updates (write and read), smp_mb forces @@ -136,44 +154,44 @@ typedef per_proc_byte { byte val[NR_PROCS]; }; -/* Bitfield has a maximum of 8 procs */ typedef per_proc_bit { + bit val[NR_PROCS]; +}; + +/* Bitfield has a maximum of 8 procs */ +typedef per_proc_bitfield { byte bitfield; }; #define DECLARE_CACHED_VAR(type, x) \ - type mem_##x; \ - per_proc_##type cached_##x; \ - per_proc_bit cache_dirty_##x; - -#define INIT_CACHED_VAR(x, v, j) \ - mem_##x = v; \ - cache_dirty_##x.bitfield = 0; \ - j = 0; \ - do \ - :: j < NR_PROCS -> \ - cached_##x.val[j] = v; \ - j++ \ - :: j >= NR_PROCS -> break \ - od; + type mem_##x; + +#define DECLARE_PROC_CACHED_VAR(type, x)\ + type cached_##x; \ + bit cache_dirty_##x; + +#define INIT_CACHED_VAR(x, v) \ + mem_##x = v; + +#define INIT_PROC_CACHED_VAR(x, v) \ + cache_dirty_##x = 0; \ + cached_##x = v; -#define IS_CACHE_DIRTY(x, id) (cache_dirty_##x.bitfield & (1 << id)) +#define IS_CACHE_DIRTY(x, id) (cache_dirty_##x) -#define READ_CACHED_VAR(x) (cached_##x.val[get_pid()]) +#define READ_CACHED_VAR(x) (cached_##x) #define WRITE_CACHED_VAR(x, v) \ atomic { \ - cached_##x.val[get_pid()] = v; \ - cache_dirty_##x.bitfield = \ - cache_dirty_##x.bitfield | (1 << get_pid()); \ + cached_##x = v; \ + cache_dirty_##x = 1; \ } #define CACHE_WRITE_TO_MEM(x, id) \ if \ :: IS_CACHE_DIRTY(x, id) -> \ - mem_##x = cached_##x.val[id]; \ - cache_dirty_##x.bitfield = \ - cache_dirty_##x.bitfield & (~(1 << id)); \ + mem_##x = cached_##x; \ + cache_dirty_##x = 0; \ :: else -> \ skip \ fi; @@ -181,7 +199,7 @@ typedef per_proc_bit { #define CACHE_READ_FROM_MEM(x, id) \ if \ :: !IS_CACHE_DIRTY(x, id) -> \ - cached_##x.val[id] = mem_##x;\ + cached_##x = mem_##x; \ :: else -> \ skip \ fi; @@ -202,7 +220,7 @@ typedef per_proc_bit { fi; /* Must consume all prior read tokens. All subsequent reads depend on it. */ -inline smp_rmb(i, j) +inline smp_rmb(i) { atomic { CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid()); @@ -213,12 +231,19 @@ inline smp_rmb(i, j) i++ :: i >= NR_READERS -> break od; - CACHE_READ_FROM_MEM(generation_ptr, get_pid()); + CACHE_READ_FROM_MEM(rcu_ptr, get_pid()); + i = 0; + do + :: i < SLAB_SIZE -> + CACHE_READ_FROM_MEM(rcu_data[i], get_pid()); + i++ + :: i >= SLAB_SIZE -> break + od; } } /* Must consume all prior write tokens. All subsequent writes depend on it. */ -inline smp_wmb(i, j) +inline smp_wmb(i) { atomic { CACHE_WRITE_TO_MEM(urcu_gp_ctr, get_pid()); @@ -229,21 +254,27 @@ inline smp_wmb(i, j) i++ :: i >= NR_READERS -> break od; - CACHE_WRITE_TO_MEM(generation_ptr, get_pid()); + CACHE_WRITE_TO_MEM(rcu_ptr, get_pid()); + i = 0; + do + :: i < SLAB_SIZE -> + CACHE_WRITE_TO_MEM(rcu_data[i], get_pid()); + i++ + :: i >= SLAB_SIZE -> break + od; } } /* Synchronization point. Must consume all prior read and write tokens. All * subsequent reads and writes depend on it. */ -inline smp_mb(i, j) +inline smp_mb(i) { atomic { - smp_wmb(i, j); - smp_rmb(i, j); + smp_wmb(i); + smp_rmb(i); } } - #ifdef REMOTE_BARRIERS bit reader_barrier[NR_READERS]; @@ -267,57 +298,82 @@ inline smp_mb_recv(i, j) { do :: (reader_barrier[get_readerid()] == 1) -> - smp_mb(i, j); + /* + * We choose to ignore cycles caused by writer busy-looping, + * waiting for the reader, sending barrier requests, and the + * reader always services them without continuing execution. + */ +progress_ignoring_mb1: + smp_mb(i); reader_barrier[get_readerid()] = 0; - :: 1 -> skip; - :: 1 -> break; + :: 1 -> + /* + * We choose to ignore writer's non-progress caused by the + * reader ignoring the writer's mb() requests. + */ +progress_ignoring_mb2: + break; od; } -inline smp_mb_send(i, j) -{ - smp_mb(i, j); - i = 0; - do - :: i < NR_READERS -> - reader_barrier[i] = 1; - do - :: (reader_barrier[i] == 1) -> skip; - :: (reader_barrier[i] == 0) -> break; - od; - i++; - :: i >= NR_READERS -> - break - od; - smp_mb(i, j); +#define PROGRESS_LABEL(progressid) progress_writer_progid_##progressid: + +#define smp_mb_send(i, j, progressid) \ +{ \ + smp_mb(i); \ + i = 0; \ + do \ + :: i < NR_READERS -> \ + reader_barrier[i] = 1; \ + /* \ + * Busy-looping waiting for reader barrier handling is of little\ + * interest, given the reader has the ability to totally ignore \ + * barrier requests. \ + */ \ + do \ + :: (reader_barrier[i] == 1) -> \ +PROGRESS_LABEL(progressid) \ + skip; \ + :: (reader_barrier[i] == 0) -> break; \ + od; \ + i++; \ + :: i >= NR_READERS -> \ + break \ + od; \ + smp_mb(i); \ } #else -#define smp_mb_send smp_mb -#define smp_mb_reader smp_mb +#define smp_mb_send(i, j, progressid) smp_mb(i) +#define smp_mb_reader(i, j) smp_mb(i) #define smp_mb_recv(i, j) #endif -/* Keep in sync manually with smp_rmb, wmp_wmb, ooo_mem and init() */ +/* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */ DECLARE_CACHED_VAR(byte, urcu_gp_ctr); -/* Note ! currently only two readers */ +/* Note ! currently only one reader */ DECLARE_CACHED_VAR(byte, urcu_active_readers[NR_READERS]); -/* pointer generation */ -DECLARE_CACHED_VAR(byte, generation_ptr); - -byte last_free_gen = 0; -bit free_done = 0; -byte read_generation[NR_READERS]; -bit data_access[NR_READERS]; +/* RCU data */ +DECLARE_CACHED_VAR(bit, rcu_data[SLAB_SIZE]); + +/* RCU pointer */ +#if (SLAB_SIZE == 2) +DECLARE_CACHED_VAR(bit, rcu_ptr); +bit ptr_read_first[NR_READERS]; +bit ptr_read_second[NR_READERS]; +#else +DECLARE_CACHED_VAR(byte, rcu_ptr); +byte ptr_read_first[NR_READERS]; +byte ptr_read_second[NR_READERS]; +#endif -bit write_lock = 0; +bit data_read_first[NR_READERS]; +bit data_read_second[NR_READERS]; bit init_done = 0; -bit sighand_exec = 0; - inline wait_init_done() { do @@ -338,7 +394,15 @@ inline ooo_mem(i) i++ :: i >= NR_READERS -> break od; - RANDOM_CACHE_WRITE_TO_MEM(generation_ptr, get_pid()); + RANDOM_CACHE_WRITE_TO_MEM(rcu_ptr, get_pid()); + i = 0; + do + :: i < SLAB_SIZE -> + RANDOM_CACHE_WRITE_TO_MEM(rcu_data[i], get_pid()); + i++ + :: i >= SLAB_SIZE -> break + od; +#ifdef HAVE_OOO_CACHE_READ RANDOM_CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid()); i = 0; do @@ -348,7 +412,17 @@ inline ooo_mem(i) i++ :: i >= NR_READERS -> break od; - RANDOM_CACHE_READ_FROM_MEM(generation_ptr, get_pid()); + RANDOM_CACHE_READ_FROM_MEM(rcu_ptr, get_pid()); + i = 0; + do + :: i < SLAB_SIZE -> + RANDOM_CACHE_READ_FROM_MEM(rcu_data[i], get_pid()); + i++ + :: i >= SLAB_SIZE -> break + od; +#else + smp_rmb(i); +#endif /* HAVE_OOO_CACHE_READ */ } } @@ -365,8 +439,8 @@ int _proc_urcu_reader; #define READ_PROD_B_IF_FALSE (1 << 2) #define READ_PROD_C_IF_TRUE_READ (1 << 3) -#define PROCEDURE_READ_LOCK(base, consumetoken, producetoken) \ - :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, READ_PROD_A_READ << base) -> \ +#define PROCEDURE_READ_LOCK(base, consumetoken, consumetoken2, producetoken) \ + :: CONSUME_TOKENS(proc_urcu_reader, (consumetoken | consumetoken2), READ_PROD_A_READ << base) -> \ ooo_mem(i); \ tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \ PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_A_READ << base); \ @@ -380,13 +454,14 @@ int _proc_urcu_reader; PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_FALSE << base); \ fi; \ /* IF TRUE */ \ - :: CONSUME_TOKENS(proc_urcu_reader, READ_PROD_B_IF_TRUE << base, \ + :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, /* prefetch */ \ READ_PROD_C_IF_TRUE_READ << base) -> \ ooo_mem(i); \ tmp2 = READ_CACHED_VAR(urcu_gp_ctr); \ PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_C_IF_TRUE_READ << base); \ :: CONSUME_TOKENS(proc_urcu_reader, \ - (READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \ + (READ_PROD_B_IF_TRUE \ + | READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \ | READ_PROD_A_READ) << base, /* WAR */ \ producetoken) -> \ ooo_mem(i); \ @@ -416,14 +491,14 @@ int _proc_urcu_reader; consumetoken, \ READ_PROC_READ_UNLOCK << base) -> \ ooo_mem(i); \ - tmp2 = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \ + tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \ PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_UNLOCK << base); \ :: CONSUME_TOKENS(proc_urcu_reader, \ consumetoken \ | (READ_PROC_READ_UNLOCK << base), /* WAR */ \ producetoken) -> \ ooo_mem(i); \ - WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2 - 1); \ + WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp - 1); \ PRODUCE_TOKENS(proc_urcu_reader, producetoken); \ skip @@ -587,7 +662,6 @@ non_atomic3_end: skip; fi; } - :: 1 -> skip; fi; goto non_atomic3_skip; @@ -600,7 +674,7 @@ non_atomic3_skip: atomic { if - PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, READ_LOCK_OUT); + PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, 0, READ_LOCK_OUT); :: CONSUME_TOKENS(proc_urcu_reader, READ_LOCK_OUT, /* post-dominant */ @@ -608,24 +682,25 @@ non_atomic3_skip: smp_mb_reader(i, j); PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB); - PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB | READ_LOCK_OUT, + PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB, READ_LOCK_OUT, READ_LOCK_NESTED_OUT); :: CONSUME_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB, /* mb() orders reads */ READ_PROC_READ_GEN) -> ooo_mem(i); - read_generation[get_readerid()] = - READ_CACHED_VAR(generation_ptr); + ptr_read_first[get_readerid()] = READ_CACHED_VAR(rcu_ptr); PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_GEN); :: CONSUME_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB /* mb() orders reads */ | READ_PROC_READ_GEN, READ_PROC_ACCESS_GEN) -> - ooo_mem(i); - goto non_atomic; -non_atomic_end: + /* smp_read_barrier_depends */ + goto rmb1; +rmb1_end: + data_read_first[get_readerid()] = + READ_CACHED_VAR(rcu_data[ptr_read_first[get_readerid()]]); PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_ACCESS_GEN); @@ -665,12 +740,12 @@ non_atomic_end: /* reading urcu_active_readers, which have been written by * READ_UNLOCK_OUT : RAW */ PROCEDURE_READ_LOCK(READ_LOCK_UNROLL_BASE, - READ_UNLOCK_OUT /* RAW */ - | READ_PROC_SECOND_MB /* mb() orders reads */ - | READ_PROC_FIRST_MB /* mb() orders reads */ - | READ_LOCK_NESTED_OUT /* RAW */ + READ_PROC_SECOND_MB /* mb() orders reads */ + | READ_PROC_FIRST_MB, /* mb() orders reads */ + READ_LOCK_NESTED_OUT /* RAW */ | READ_LOCK_OUT /* RAW */ - | READ_UNLOCK_NESTED_OUT, /* RAW */ + | READ_UNLOCK_NESTED_OUT /* RAW */ + | READ_UNLOCK_OUT, /* RAW */ READ_LOCK_OUT_UNROLL); @@ -692,8 +767,7 @@ non_atomic_end: | READ_PROC_THIRD_MB, /* mb() orders reads */ READ_PROC_READ_GEN_UNROLL) -> ooo_mem(i); - read_generation[get_readerid()] = - READ_CACHED_VAR(generation_ptr); + ptr_read_second[get_readerid()] = READ_CACHED_VAR(rcu_ptr); PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_GEN_UNROLL); :: CONSUME_TOKENS(proc_urcu_reader, @@ -702,9 +776,11 @@ non_atomic_end: | READ_PROC_SECOND_MB /* mb() orders reads */ | READ_PROC_THIRD_MB, /* mb() orders reads */ READ_PROC_ACCESS_GEN_UNROLL) -> - ooo_mem(i); - goto non_atomic2; -non_atomic2_end: + /* smp_read_barrier_depends */ + goto rmb2; +rmb2_end: + data_read_second[get_readerid()] = + READ_CACHED_VAR(rcu_data[ptr_read_second[get_readerid()]]); PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_ACCESS_GEN_UNROLL); :: CONSUME_TOKENS(proc_urcu_reader, @@ -752,14 +828,20 @@ non_atomic2_end: * to spill its execution on other loop's execution. */ goto end; -non_atomic: - data_access[get_readerid()] = 1; - data_access[get_readerid()] = 0; - goto non_atomic_end; -non_atomic2: - data_access[get_readerid()] = 1; - data_access[get_readerid()] = 0; - goto non_atomic2_end; +rmb1: +#ifndef NO_RMB + smp_rmb(i); +#else + ooo_mem(i); +#endif + goto rmb1_end; +rmb2: +#ifndef NO_RMB + smp_rmb(i); +#else + ooo_mem(i); +#endif + goto rmb2_end; end: skip; } @@ -771,6 +853,41 @@ active proctype urcu_reader() byte i, j, nest_i; byte tmp, tmp2; + /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */ + DECLARE_PROC_CACHED_VAR(byte, urcu_gp_ctr); + /* Note ! currently only one reader */ + DECLARE_PROC_CACHED_VAR(byte, urcu_active_readers[NR_READERS]); + /* RCU data */ + DECLARE_PROC_CACHED_VAR(bit, rcu_data[SLAB_SIZE]); + + /* RCU pointer */ +#if (SLAB_SIZE == 2) + DECLARE_PROC_CACHED_VAR(bit, rcu_ptr); +#else + DECLARE_PROC_CACHED_VAR(byte, rcu_ptr); +#endif + + atomic { + INIT_PROC_CACHED_VAR(urcu_gp_ctr, 1); + INIT_PROC_CACHED_VAR(rcu_ptr, 0); + + i = 0; + do + :: i < NR_READERS -> + INIT_PROC_CACHED_VAR(urcu_active_readers[i], 0); + i++; + :: i >= NR_READERS -> break + od; + INIT_PROC_CACHED_VAR(rcu_data[0], WINE); + i = 1; + do + :: i < SLAB_SIZE -> + INIT_PROC_CACHED_VAR(rcu_data[i], POISON); + i++ + :: i >= SLAB_SIZE -> break + od; + } + wait_init_done(); assert(get_pid() < NR_PROCS); @@ -808,23 +925,32 @@ int _proc_urcu_writer; #define WRITE_PROD_NONE (1 << 0) -#define WRITE_PROC_FIRST_MB (1 << 1) +#define WRITE_DATA (1 << 1) +#define WRITE_PROC_WMB (1 << 2) +#define WRITE_XCHG_PTR (1 << 3) + +#define WRITE_PROC_FIRST_MB (1 << 4) /* first flip */ -#define WRITE_PROC_FIRST_READ_GP (1 << 2) -#define WRITE_PROC_FIRST_WRITE_GP (1 << 3) -#define WRITE_PROC_FIRST_WAIT (1 << 4) -#define WRITE_PROC_FIRST_WAIT_LOOP (1 << 5) +#define WRITE_PROC_FIRST_READ_GP (1 << 5) +#define WRITE_PROC_FIRST_WRITE_GP (1 << 6) +#define WRITE_PROC_FIRST_WAIT (1 << 7) +#define WRITE_PROC_FIRST_WAIT_LOOP (1 << 8) /* second flip */ -#define WRITE_PROC_SECOND_READ_GP (1 << 6) -#define WRITE_PROC_SECOND_WRITE_GP (1 << 7) -#define WRITE_PROC_SECOND_WAIT (1 << 8) -#define WRITE_PROC_SECOND_WAIT_LOOP (1 << 9) +#define WRITE_PROC_SECOND_READ_GP (1 << 9) +#define WRITE_PROC_SECOND_WRITE_GP (1 << 10) +#define WRITE_PROC_SECOND_WAIT (1 << 11) +#define WRITE_PROC_SECOND_WAIT_LOOP (1 << 12) -#define WRITE_PROC_SECOND_MB (1 << 10) +#define WRITE_PROC_SECOND_MB (1 << 13) + +#define WRITE_FREE (1 << 14) #define WRITE_PROC_ALL_TOKENS (WRITE_PROD_NONE \ + | WRITE_DATA \ + | WRITE_PROC_WMB \ + | WRITE_XCHG_PTR \ | WRITE_PROC_FIRST_MB \ | WRITE_PROC_FIRST_READ_GP \ | WRITE_PROC_FIRST_WRITE_GP \ @@ -832,52 +958,78 @@ int _proc_urcu_writer; | WRITE_PROC_SECOND_READ_GP \ | WRITE_PROC_SECOND_WRITE_GP \ | WRITE_PROC_SECOND_WAIT \ - | WRITE_PROC_SECOND_MB) + | WRITE_PROC_SECOND_MB \ + | WRITE_FREE) -#define WRITE_PROC_ALL_TOKENS_CLEAR ((1 << 11) - 1) +#define WRITE_PROC_ALL_TOKENS_CLEAR ((1 << 15) - 1) +/* + * Mutexes are implied around writer execution. A single writer at a time. + */ active proctype urcu_writer() { byte i, j; byte tmp, tmp2, tmpa; - byte old_gen; + byte cur_data = 0, old_data, loop_nr = 0; byte cur_gp_val = 0; /* * Keep a local trace of the current parity so * we don't add non-existing dependencies on the global * GP update. Needed to test single flip case. */ + /* Keep in sync manually with smp_rmb, smp_wmb, ooo_mem and init() */ + DECLARE_PROC_CACHED_VAR(byte, urcu_gp_ctr); + /* Note ! currently only one reader */ + DECLARE_PROC_CACHED_VAR(byte, urcu_active_readers[NR_READERS]); + /* RCU data */ + DECLARE_PROC_CACHED_VAR(bit, rcu_data[SLAB_SIZE]); + + /* RCU pointer */ +#if (SLAB_SIZE == 2) + DECLARE_PROC_CACHED_VAR(bit, rcu_ptr); +#else + DECLARE_PROC_CACHED_VAR(byte, rcu_ptr); +#endif + + atomic { + INIT_PROC_CACHED_VAR(urcu_gp_ctr, 1); + INIT_PROC_CACHED_VAR(rcu_ptr, 0); + + i = 0; + do + :: i < NR_READERS -> + INIT_PROC_CACHED_VAR(urcu_active_readers[i], 0); + i++; + :: i >= NR_READERS -> break + od; + INIT_PROC_CACHED_VAR(rcu_data[0], WINE); + i = 1; + do + :: i < SLAB_SIZE -> + INIT_PROC_CACHED_VAR(rcu_data[i], POISON); + i++ + :: i >= SLAB_SIZE -> break + od; + } + + wait_init_done(); assert(get_pid() < NR_PROCS); do - :: (READ_CACHED_VAR(generation_ptr) < 5) -> + :: (loop_nr < 3) -> #ifdef WRITER_PROGRESS progress_writer1: #endif - ooo_mem(i); - atomic { - old_gen = READ_CACHED_VAR(generation_ptr); - WRITE_CACHED_VAR(generation_ptr, old_gen + 1); - } - ooo_mem(i); - - do - :: 1 -> - atomic { - if - :: write_lock == 0 -> - write_lock = 1; - break; - :: else -> - skip; - fi; - } - od; + loop_nr = loop_nr + 1; PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROD_NONE); +#ifdef NO_WMB + PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_WMB); +#endif + #ifdef NO_MB PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_MB); PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_MB); @@ -887,13 +1039,41 @@ progress_writer1: PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP); PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP); PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT); + /* For single flip, we need to know the current parity */ + cur_gp_val = cur_gp_val ^ RCU_GP_CTR_BIT; #endif do :: 1 -> atomic { if + :: CONSUME_TOKENS(proc_urcu_writer, WRITE_PROD_NONE, + WRITE_DATA) -> + ooo_mem(i); + cur_data = (cur_data + 1) % SLAB_SIZE; + WRITE_CACHED_VAR(rcu_data[cur_data], WINE); + PRODUCE_TOKENS(proc_urcu_writer, WRITE_DATA); + + + :: CONSUME_TOKENS(proc_urcu_writer, + WRITE_DATA, + WRITE_PROC_WMB) -> + smp_wmb(i); + PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_WMB); + + :: CONSUME_TOKENS(proc_urcu_writer, + WRITE_PROC_WMB, + WRITE_XCHG_PTR) -> + /* rcu_xchg_pointer() */ + atomic { + old_data = READ_CACHED_VAR(rcu_ptr); + WRITE_CACHED_VAR(rcu_ptr, cur_data); + } + PRODUCE_TOKENS(proc_urcu_writer, WRITE_XCHG_PTR); + + :: CONSUME_TOKENS(proc_urcu_writer, + WRITE_DATA | WRITE_PROC_WMB | WRITE_XCHG_PTR, WRITE_PROC_FIRST_MB) -> goto smp_mb_send1; smp_mb_send1_end: @@ -906,20 +1086,27 @@ smp_mb_send1_end: tmpa = READ_CACHED_VAR(urcu_gp_ctr); PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_READ_GP); :: CONSUME_TOKENS(proc_urcu_writer, - WRITE_PROC_FIRST_MB | WRITE_PROC_FIRST_READ_GP, + WRITE_PROC_FIRST_MB | WRITE_PROC_WMB + | WRITE_PROC_FIRST_READ_GP, WRITE_PROC_FIRST_WRITE_GP) -> ooo_mem(i); WRITE_CACHED_VAR(urcu_gp_ctr, tmpa ^ RCU_GP_CTR_BIT); PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WRITE_GP); :: CONSUME_TOKENS(proc_urcu_writer, - //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */ + //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */ WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */ WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_WAIT_LOOP) -> ooo_mem(i); + //smp_mb(i); /* TEST */ /* ONLY WAITING FOR READER 0 */ tmp2 = READ_CACHED_VAR(urcu_active_readers[0]); - cur_gp_val = cur_gp_val ^ RCU_GP_CTR_BIT; +#ifndef SINGLE_FLIP + /* In normal execution, we are always starting by + * waiting for the even parity. + */ + cur_gp_val = RCU_GP_CTR_BIT; +#endif if :: (tmp2 & RCU_GP_CTR_NEST_MASK) && ((tmp2 ^ cur_gp_val) & RCU_GP_CTR_BIT) -> @@ -933,11 +1120,18 @@ smp_mb_send1_end: WRITE_PROC_FIRST_WRITE_GP | WRITE_PROC_FIRST_READ_GP | WRITE_PROC_FIRST_WAIT_LOOP + | WRITE_DATA | WRITE_PROC_WMB | WRITE_XCHG_PTR | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */ 0) -> #ifndef GEN_ERROR_WRITER_PROGRESS goto smp_mb_send2; smp_mb_send2_end: + /* The memory barrier will invalidate the + * second read done as prefetching. Note that all + * instructions with side-effects depending on + * WRITE_PROC_SECOND_READ_GP should also depend on + * completion of this busy-waiting loop. */ + CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP); #else ooo_mem(i); #endif @@ -946,17 +1140,22 @@ smp_mb_send2_end: /* second flip */ :: CONSUME_TOKENS(proc_urcu_writer, - WRITE_PROC_FIRST_WAIT /* Control dependency : need to branch out of - * the loop to execute the next flip (CHECK) */ - | WRITE_PROC_FIRST_WRITE_GP + //WRITE_PROC_FIRST_WAIT | //test /* no dependency. Could pre-fetch, no side-effect. */ + WRITE_PROC_FIRST_WRITE_GP | WRITE_PROC_FIRST_READ_GP | WRITE_PROC_FIRST_MB, WRITE_PROC_SECOND_READ_GP) -> ooo_mem(i); + //smp_mb(i); /* TEST */ tmpa = READ_CACHED_VAR(urcu_gp_ctr); PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP); :: CONSUME_TOKENS(proc_urcu_writer, - WRITE_PROC_FIRST_MB + WRITE_PROC_FIRST_WAIT /* dependency on first wait, because this + * instruction has globally observable + * side-effects. + */ + | WRITE_PROC_FIRST_MB + | WRITE_PROC_WMB | WRITE_PROC_FIRST_READ_GP | WRITE_PROC_FIRST_WRITE_GP | WRITE_PROC_SECOND_READ_GP, @@ -966,29 +1165,30 @@ smp_mb_send2_end: PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP); :: CONSUME_TOKENS(proc_urcu_writer, - //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */ + //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */ WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */ WRITE_PROC_SECOND_WAIT | WRITE_PROC_SECOND_WAIT_LOOP) -> ooo_mem(i); + //smp_mb(i); /* TEST */ /* ONLY WAITING FOR READER 0 */ tmp2 = READ_CACHED_VAR(urcu_active_readers[0]); - cur_gp_val = cur_gp_val ^ RCU_GP_CTR_BIT; if :: (tmp2 & RCU_GP_CTR_NEST_MASK) - && ((tmp2 ^ cur_gp_val) & RCU_GP_CTR_BIT) -> + && ((tmp2 ^ 0) & RCU_GP_CTR_BIT) -> PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT_LOOP); :: else -> PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WAIT); fi; :: CONSUME_TOKENS(proc_urcu_writer, - //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */ + //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */ WRITE_PROC_SECOND_WRITE_GP | WRITE_PROC_FIRST_WRITE_GP | WRITE_PROC_SECOND_READ_GP | WRITE_PROC_FIRST_READ_GP | WRITE_PROC_SECOND_WAIT_LOOP + | WRITE_DATA | WRITE_PROC_WMB | WRITE_XCHG_PTR | WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */ 0) -> #ifndef GEN_ERROR_WRITER_PROGRESS @@ -1008,25 +1208,40 @@ smp_mb_send3_end: | WRITE_PROC_SECOND_READ_GP | WRITE_PROC_FIRST_WRITE_GP | WRITE_PROC_SECOND_WRITE_GP + | WRITE_DATA | WRITE_PROC_WMB | WRITE_XCHG_PTR | WRITE_PROC_FIRST_MB, WRITE_PROC_SECOND_MB) -> goto smp_mb_send4; smp_mb_send4_end: PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_MB); + :: CONSUME_TOKENS(proc_urcu_writer, + WRITE_XCHG_PTR + | WRITE_PROC_FIRST_WAIT + | WRITE_PROC_SECOND_WAIT + | WRITE_PROC_WMB /* No dependency on + * WRITE_DATA because we + * write to a + * different location. */ + | WRITE_PROC_SECOND_MB + | WRITE_PROC_FIRST_MB, + WRITE_FREE) -> + WRITE_CACHED_VAR(rcu_data[old_data], POISON); + PRODUCE_TOKENS(proc_urcu_writer, WRITE_FREE); + :: CONSUME_TOKENS(proc_urcu_writer, WRITE_PROC_ALL_TOKENS, 0) -> CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_ALL_TOKENS_CLEAR); break; fi; } od; - - write_lock = 0; - /* free-up step, e.g., kfree(). */ - atomic { - last_free_gen = old_gen; - free_done = 1; - } + /* + * Note : Promela model adds implicit serialization of the + * WRITE_FREE instruction. Normally, it would be permitted to + * spill on the next loop execution. Given the validation we do + * checks for the data entry read to be poisoned, it's ok if + * we do not check "late arriving" memory poisoning. + */ :: else -> break; od; /* @@ -1039,6 +1254,12 @@ end_writer: :: 1 -> #ifdef WRITER_PROGRESS progress_writer2: +#endif +#ifdef READER_PROGRESS + /* + * Make sure we don't block the reader's progress. + */ + smp_mb_send(i, j, 5); #endif skip; od; @@ -1046,18 +1267,18 @@ progress_writer2: /* Non-atomic parts of the loop */ goto end; smp_mb_send1: - smp_mb_send(i, j); + smp_mb_send(i, j, 1); goto smp_mb_send1_end; #ifndef GEN_ERROR_WRITER_PROGRESS smp_mb_send2: - smp_mb_send(i, j); + smp_mb_send(i, j, 2); goto smp_mb_send2_end; smp_mb_send3: - smp_mb_send(i, j); + smp_mb_send(i, j, 3); goto smp_mb_send3_end; #endif smp_mb_send4: - smp_mb_send(i, j); + smp_mb_send(i, j, 4); goto smp_mb_send4_end; end: skip; @@ -1072,18 +1293,29 @@ init { byte i, j; atomic { - INIT_CACHED_VAR(urcu_gp_ctr, 1, j); - INIT_CACHED_VAR(generation_ptr, 0, j); + INIT_CACHED_VAR(urcu_gp_ctr, 1); + INIT_CACHED_VAR(rcu_ptr, 0); i = 0; do :: i < NR_READERS -> - INIT_CACHED_VAR(urcu_active_readers[i], 0, j); - read_generation[i] = 1; - data_access[i] = 0; + INIT_CACHED_VAR(urcu_active_readers[i], 0); + ptr_read_first[i] = 1; + ptr_read_second[i] = 1; + data_read_first[i] = WINE; + data_read_second[i] = WINE; i++; :: i >= NR_READERS -> break od; + INIT_CACHED_VAR(rcu_data[0], WINE); + i = 1; + do + :: i < SLAB_SIZE -> + INIT_CACHED_VAR(rcu_data[i], POISON); + i++ + :: i >= SLAB_SIZE -> break + od; + init_done = 1; } }