* output exchanged. Therefore, i post-dominating j ensures that every path
* passing by j will pass by i before reaching the output.
*
+ * Prefetch and speculative execution
+ *
+ * If an instruction depends on the result of a previous branch, but it does not
+ * have side-effects, it can be executed before the branch result is known.
+ * however, it must be restarted if a core-synchronizing instruction is issued.
+ * Note that instructions which depend on the speculative instruction result
+ * but that have side-effects must depend on the branch completion in addition
+ * to the speculatively executed instruction.
+ *
* Other considerations
*
* Note about "volatile" keyword dependency : The compiler will order volatile
* Nested calls are not supported.
*/
+/*
+ * Only Alpha has out-of-order cache bank loads. Other architectures (intel,
+ * powerpc, arm) ensure that dependent reads won't be reordered. c.f.
+ * http://www.linuxjournal.com/article/8212)
+ */
+#ifdef ARCH_ALPHA
+#define HAVE_OOO_CACHE_READ
+#endif
+
/*
* Each process have its own data in cache. Caches are randomly updated.
* smp_wmb and smp_rmb forces cache updates (write and read), smp_mb forces
fi;
/* Must consume all prior read tokens. All subsequent reads depend on it. */
-inline smp_rmb(i, j)
+inline smp_rmb(i)
{
atomic {
CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid());
}
/* Must consume all prior write tokens. All subsequent writes depend on it. */
-inline smp_wmb(i, j)
+inline smp_wmb(i)
{
atomic {
CACHE_WRITE_TO_MEM(urcu_gp_ctr, get_pid());
/* Synchronization point. Must consume all prior read and write tokens. All
* subsequent reads and writes depend on it. */
-inline smp_mb(i, j)
+inline smp_mb(i)
{
atomic {
- smp_wmb(i, j);
- smp_rmb(i, j);
+ smp_wmb(i);
+ smp_rmb(i);
}
}
{
do
:: (reader_barrier[get_readerid()] == 1) ->
- smp_mb(i, j);
+ /*
+ * We choose to ignore cycles caused by writer busy-looping,
+ * waiting for the reader, sending barrier requests, and the
+ * reader always services them without continuing execution.
+ */
+progress_ignoring_mb1:
+ smp_mb(i);
reader_barrier[get_readerid()] = 0;
:: 1 ->
- /* We choose to ignore writer's non-progress caused from the
- * reader ignoring the writer's mb() requests */
-#ifdef WRITER_PROGRESS
-progress_writer_from_reader:
-#endif
+ /*
+ * We choose to ignore writer's non-progress caused by the
+ * reader ignoring the writer's mb() requests.
+ */
+progress_ignoring_mb2:
break;
od;
}
-#ifdef WRITER_PROGRESS
-//#define PROGRESS_LABEL(progressid) progress_writer_progid_##progressid:
-#define PROGRESS_LABEL(progressid)
-#else
-#define PROGRESS_LABEL(progressid)
-#endif
+#define PROGRESS_LABEL(progressid) progress_writer_progid_##progressid:
#define smp_mb_send(i, j, progressid) \
{ \
- smp_mb(i, j); \
+ smp_mb(i); \
i = 0; \
do \
:: i < NR_READERS -> \
* interest, given the reader has the ability to totally ignore \
* barrier requests. \
*/ \
-PROGRESS_LABEL(progressid) \
do \
- :: (reader_barrier[i] == 1) -> skip; \
+ :: (reader_barrier[i] == 1) -> \
+PROGRESS_LABEL(progressid) \
+ skip; \
:: (reader_barrier[i] == 0) -> break; \
od; \
i++; \
:: i >= NR_READERS -> \
break \
od; \
- smp_mb(i, j); \
+ smp_mb(i); \
}
#else
-#define smp_mb_send(i, j, progressid) smp_mb(i, j)
-#define smp_mb_reader smp_mb
+#define smp_mb_send(i, j, progressid) smp_mb(i)
+#define smp_mb_reader(i, j) smp_mb(i)
#define smp_mb_recv(i, j)
#endif
i++
:: i >= SLAB_SIZE -> break
od;
+#ifdef HAVE_OOO_CACHE_READ
RANDOM_CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid());
i = 0;
do
i++
:: i >= SLAB_SIZE -> break
od;
+#else
+ smp_rmb(i);
+#endif /* HAVE_OOO_CACHE_READ */
}
}
#define READ_PROD_B_IF_FALSE (1 << 2)
#define READ_PROD_C_IF_TRUE_READ (1 << 3)
-#define PROCEDURE_READ_LOCK(base, consumetoken, producetoken) \
- :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, READ_PROD_A_READ << base) -> \
+#define PROCEDURE_READ_LOCK(base, consumetoken, consumetoken2, producetoken) \
+ :: CONSUME_TOKENS(proc_urcu_reader, (consumetoken | consumetoken2), READ_PROD_A_READ << base) -> \
ooo_mem(i); \
tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \
PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_A_READ << base); \
PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_FALSE << base); \
fi; \
/* IF TRUE */ \
- :: CONSUME_TOKENS(proc_urcu_reader, READ_PROD_B_IF_TRUE << base, \
+ :: CONSUME_TOKENS(proc_urcu_reader, consumetoken, /* prefetch */ \
READ_PROD_C_IF_TRUE_READ << base) -> \
ooo_mem(i); \
tmp2 = READ_CACHED_VAR(urcu_gp_ctr); \
PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_C_IF_TRUE_READ << base); \
:: CONSUME_TOKENS(proc_urcu_reader, \
- (READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \
+ (READ_PROD_B_IF_TRUE \
+ | READ_PROD_C_IF_TRUE_READ /* pre-dominant */ \
| READ_PROD_A_READ) << base, /* WAR */ \
producetoken) -> \
ooo_mem(i); \
consumetoken, \
READ_PROC_READ_UNLOCK << base) -> \
ooo_mem(i); \
- tmp2 = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \
+ tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]); \
PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_UNLOCK << base); \
:: CONSUME_TOKENS(proc_urcu_reader, \
consumetoken \
| (READ_PROC_READ_UNLOCK << base), /* WAR */ \
producetoken) -> \
ooo_mem(i); \
- WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2 - 1); \
+ WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp - 1); \
PRODUCE_TOKENS(proc_urcu_reader, producetoken); \
skip
skip;
fi;
}
- :: 1 -> skip;
fi;
goto non_atomic3_skip;
atomic {
if
- PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, READ_LOCK_OUT);
+ PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, 0, READ_LOCK_OUT);
:: CONSUME_TOKENS(proc_urcu_reader,
READ_LOCK_OUT, /* post-dominant */
smp_mb_reader(i, j);
PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
- PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB | READ_LOCK_OUT,
+ PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB, READ_LOCK_OUT,
READ_LOCK_NESTED_OUT);
:: CONSUME_TOKENS(proc_urcu_reader,
/* reading urcu_active_readers, which have been written by
* READ_UNLOCK_OUT : RAW */
PROCEDURE_READ_LOCK(READ_LOCK_UNROLL_BASE,
- READ_UNLOCK_OUT /* RAW */
- | READ_PROC_SECOND_MB /* mb() orders reads */
- | READ_PROC_FIRST_MB /* mb() orders reads */
- | READ_LOCK_NESTED_OUT /* RAW */
+ READ_PROC_SECOND_MB /* mb() orders reads */
+ | READ_PROC_FIRST_MB, /* mb() orders reads */
+ READ_LOCK_NESTED_OUT /* RAW */
| READ_LOCK_OUT /* RAW */
- | READ_UNLOCK_NESTED_OUT, /* RAW */
+ | READ_UNLOCK_NESTED_OUT /* RAW */
+ | READ_UNLOCK_OUT, /* RAW */
READ_LOCK_OUT_UNROLL);
goto end;
rmb1:
#ifndef NO_RMB
- smp_rmb(i, j);
+ smp_rmb(i);
#else
ooo_mem(i);
#endif
goto rmb1_end;
rmb2:
#ifndef NO_RMB
- smp_rmb(i, j);
+ smp_rmb(i);
#else
ooo_mem(i);
#endif
assert(get_pid() < NR_PROCS);
do
- :: (loop_nr < 4) ->
+ :: (loop_nr < 3) ->
#ifdef WRITER_PROGRESS
progress_writer1:
#endif
:: CONSUME_TOKENS(proc_urcu_writer,
WRITE_DATA,
WRITE_PROC_WMB) ->
- smp_wmb(i, j);
+ smp_wmb(i);
PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_WMB);
:: CONSUME_TOKENS(proc_urcu_writer,
PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WRITE_GP);
:: CONSUME_TOKENS(proc_urcu_writer,
- //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */
+ //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_WAIT_LOOP) ->
ooo_mem(i);
+ //smp_mb(i); /* TEST */
/* ONLY WAITING FOR READER 0 */
tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
#ifndef SINGLE_FLIP
#ifndef GEN_ERROR_WRITER_PROGRESS
goto smp_mb_send2;
smp_mb_send2_end:
+ /* The memory barrier will invalidate the
+ * second read done as prefetching. Note that all
+ * instructions with side-effects depending on
+ * WRITE_PROC_SECOND_READ_GP should also depend on
+ * completion of this busy-waiting loop. */
+ CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
#else
ooo_mem(i);
#endif
/* second flip */
:: CONSUME_TOKENS(proc_urcu_writer,
- WRITE_PROC_FIRST_WAIT /* Control dependency : need to branch out of
- * the loop to execute the next flip (CHECK) */
- | WRITE_PROC_FIRST_WRITE_GP
+ //WRITE_PROC_FIRST_WAIT | //test /* no dependency. Could pre-fetch, no side-effect. */
+ WRITE_PROC_FIRST_WRITE_GP
| WRITE_PROC_FIRST_READ_GP
| WRITE_PROC_FIRST_MB,
WRITE_PROC_SECOND_READ_GP) ->
ooo_mem(i);
+ //smp_mb(i); /* TEST */
tmpa = READ_CACHED_VAR(urcu_gp_ctr);
PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
:: CONSUME_TOKENS(proc_urcu_writer,
- WRITE_PROC_FIRST_MB
+ WRITE_PROC_FIRST_WAIT /* dependency on first wait, because this
+ * instruction has globally observable
+ * side-effects.
+ */
+ | WRITE_PROC_FIRST_MB
| WRITE_PROC_WMB
| WRITE_PROC_FIRST_READ_GP
| WRITE_PROC_FIRST_WRITE_GP
PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP);
:: CONSUME_TOKENS(proc_urcu_writer,
- //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */
+ //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
WRITE_PROC_FIRST_WAIT
| WRITE_PROC_FIRST_MB, /* can be reordered before/after flips */
WRITE_PROC_SECOND_WAIT | WRITE_PROC_SECOND_WAIT_LOOP) ->
ooo_mem(i);
+ //smp_mb(i); /* TEST */
/* ONLY WAITING FOR READER 0 */
tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
if
fi;
:: CONSUME_TOKENS(proc_urcu_writer,
- //WRITE_PROC_FIRST_WRITE_GP /* TEST ADDING SYNC CORE */
+ //WRITE_PROC_FIRST_WRITE_GP | /* TEST ADDING SYNC CORE */
WRITE_PROC_SECOND_WRITE_GP
| WRITE_PROC_FIRST_WRITE_GP
| WRITE_PROC_SECOND_READ_GP
:: 1 ->
#ifdef WRITER_PROGRESS
progress_writer2:
+#endif
+#ifdef READER_PROGRESS
+ /*
+ * Make sure we don't block the reader's progress.
+ */
+ smp_mb_send(i, j, 5);
#endif
skip;
od;