X-Git-Url: http://git.liburcu.org/?a=blobdiff_plain;f=formal-model%2Furcu-controldataflow%2Furcu.spin;h=54752a17708dbec58c2872ca663c0afe7efe4c0b;hb=ba59a0c7b244a0939a2298fc76a9002436ef9674;hp=12f841ce46ed7225be2c8fe83b63d938483a00e6;hpb=caeea74ce58faba3ef2bb7d2bd925d9009803086;p=urcu.git

diff --git a/formal-model/urcu-controldataflow/urcu.spin b/formal-model/urcu-controldataflow/urcu.spin
index 12f841c..54752a1 100644
--- a/formal-model/urcu-controldataflow/urcu.spin
+++ b/formal-model/urcu-controldataflow/urcu.spin
@@ -98,6 +98,15 @@
  * output exchanged. Therefore, i post-dominating j ensures that every path
  * passing by j will pass by i before reaching the output.
  *
+ * Prefetch and speculative execution
+ *
+ * If an instruction depends on the result of a previous branch, but it does not
+ * have side-effects, it can be executed before the branch result is known.
+ * however, it must be restarted if a core-synchronizing instruction is issued.
+ * Note that instructions which depend on the speculative instruction result
+ * but that have side-effects must depend on the branch completion in addition
+ * to the speculatively executed instruction.
+ *
  * Other considerations
  *
  * Note about "volatile" keyword dependency : The compiler will order volatile
@@ -126,6 +135,15 @@
  * Nested calls are not supported.
  */
 
+/*
+ * Only Alpha has out-of-order cache bank loads. Other architectures (intel,
+ * powerpc, arm) ensure that dependent reads won't be reordered. c.f.
+ * http://www.linuxjournal.com/article/8212)
+ */
+#ifdef ARCH_ALPHA
+#define HAVE_OOO_CACHE_READ
+#endif
+
 /*
  * Each process have its own data in cache. Caches are randomly updated.
  * smp_wmb and smp_rmb forces cache updates (write and read), smp_mb forces
@@ -206,7 +224,7 @@ typedef per_proc_bitfield {
 	fi;
 
 /* Must consume all prior read tokens. All subsequent reads depend on it. */
-inline smp_rmb(i, j)
+inline smp_rmb(i)
 {
 	atomic {
 		CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid());
@@ -229,7 +247,7 @@ inline smp_rmb(i, j)
 }
 
 /* Must consume all prior write tokens. All subsequent writes depend on it. */
-inline smp_wmb(i, j)
+inline smp_wmb(i)
 {
 	atomic {
 		CACHE_WRITE_TO_MEM(urcu_gp_ctr, get_pid());
@@ -253,11 +271,11 @@ inline smp_wmb(i, j)
 
 /* Synchronization point. Must consume all prior read and write tokens. All
  * subsequent reads and writes depend on it. */
-inline smp_mb(i, j)
+inline smp_mb(i)
 {
 	atomic {
-		smp_wmb(i, j);
-		smp_rmb(i, j);
+		smp_wmb(i);
+		smp_rmb(i);
 	}
 }
 
@@ -284,28 +302,29 @@ inline smp_mb_recv(i, j)
 {
 	do
 	:: (reader_barrier[get_readerid()] == 1) ->
-		smp_mb(i, j);
+		/*
+		 * We choose to ignore cycles caused by writer busy-looping,
+		 * waiting for the reader, sending barrier requests, and the
+		 * reader always services them without continuing execution.
+		 */
+progress_ignoring_mb1:
+		smp_mb(i);
 		reader_barrier[get_readerid()] = 0;
 	:: 1 ->
-		/* We choose to ignore writer's non-progress caused from the
-		 * reader ignoring the writer's mb() requests */
-#ifdef WRITER_PROGRESS
-progress_writer_from_reader:
-#endif
+		/*
+		 * We choose to ignore writer's non-progress caused by the
+		 * reader ignoring the writer's mb() requests.
+		 */
+progress_ignoring_mb2:
 		break;
 	od;
 }
 
-#ifdef WRITER_PROGRESS
-//#define PROGRESS_LABEL(progressid)	progress_writer_progid_##progressid:
-#define PROGRESS_LABEL(progressid)
-#else
-#define PROGRESS_LABEL(progressid)
-#endif
+#define PROGRESS_LABEL(progressid)	progress_writer_progid_##progressid:
 
 #define smp_mb_send(i, j, progressid)						\
 {										\
-	smp_mb(i, j);								\
+	smp_mb(i);								\
 	i = 0;									\
 	do									\
 	:: i < NR_READERS ->							\
@@ -315,22 +334,23 @@ progress_writer_from_reader:
 		 * interest, given the reader has the ability to totally ignore	\
 		 * barrier requests.						\
 		 */								\
-PROGRESS_LABEL(progressid)							\
 		do								\
-		:: (reader_barrier[i] == 1) -> skip;				\
+		:: (reader_barrier[i] == 1) ->					\
+PROGRESS_LABEL(progressid)							\
+			skip;							\
 		:: (reader_barrier[i] == 0) -> break;				\
 		od;								\
 		i++;								\
 	:: i >= NR_READERS ->							\
 		break								\
 	od;									\
-	smp_mb(i, j);								\
+	smp_mb(i);								\
 }
 
 #else
 
-#define smp_mb_send(i, j, progressid)	smp_mb(i, j)
-#define smp_mb_reader	smp_mb
+#define smp_mb_send(i, j, progressid)	smp_mb(i)
+#define smp_mb_reader(i, j)		smp_mb(i)
 #define smp_mb_recv(i, j)
 
 #endif
@@ -386,6 +406,7 @@ inline ooo_mem(i)
 			i++
 		:: i >= SLAB_SIZE -> break
 		od;
+#ifdef HAVE_OOO_CACHE_READ
 		RANDOM_CACHE_READ_FROM_MEM(urcu_gp_ctr, get_pid());
 		i = 0;
 		do
@@ -403,6 +424,9 @@ inline ooo_mem(i)
 			i++
 		:: i >= SLAB_SIZE -> break
 		od;
+#else
+		smp_rmb(i);
+#endif /* HAVE_OOO_CACHE_READ */
 	}
 }
 
@@ -419,8 +443,8 @@ int _proc_urcu_reader;
 #define READ_PROD_B_IF_FALSE		(1 << 2)
 #define READ_PROD_C_IF_TRUE_READ	(1 << 3)
 
-#define PROCEDURE_READ_LOCK(base, consumetoken, producetoken)				\
-	:: CONSUME_TOKENS(proc_urcu_reader, consumetoken, READ_PROD_A_READ << base) ->	\
+#define PROCEDURE_READ_LOCK(base, consumetoken, consumetoken2, producetoken)		\
+	:: CONSUME_TOKENS(proc_urcu_reader, (consumetoken | consumetoken2), READ_PROD_A_READ << base) ->	\
 		ooo_mem(i);								\
 		tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]);		\
 		PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_A_READ << base);		\
@@ -434,13 +458,14 @@ int _proc_urcu_reader;
 			PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_B_IF_FALSE << base);	\
 		fi;									\
 	/* IF TRUE */									\
-	:: CONSUME_TOKENS(proc_urcu_reader, READ_PROD_B_IF_TRUE << base,		\
+	:: CONSUME_TOKENS(proc_urcu_reader, consumetoken, /* prefetch */		\
 			  READ_PROD_C_IF_TRUE_READ << base) ->				\
 		ooo_mem(i);								\
 		tmp2 = READ_CACHED_VAR(urcu_gp_ctr);					\
 		PRODUCE_TOKENS(proc_urcu_reader, READ_PROD_C_IF_TRUE_READ << base);	\
 	:: CONSUME_TOKENS(proc_urcu_reader,						\
-			  (READ_PROD_C_IF_TRUE_READ	/* pre-dominant */		\
+			  (READ_PROD_B_IF_TRUE						\
+			  | READ_PROD_C_IF_TRUE_READ	/* pre-dominant */		\
 			  | READ_PROD_A_READ) << base,		/* WAR */		\
 			  producetoken) ->						\
 		ooo_mem(i);								\
@@ -470,14 +495,14 @@ int _proc_urcu_reader;
 			  consumetoken,							\
 			  READ_PROC_READ_UNLOCK << base) ->				\
 		ooo_mem(i);								\
-		tmp2 = READ_CACHED_VAR(urcu_active_readers[get_readerid()]);		\
+		tmp = READ_CACHED_VAR(urcu_active_readers[get_readerid()]);		\
 		PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_READ_UNLOCK << base);	\
 	:: CONSUME_TOKENS(proc_urcu_reader,						\
 			  consumetoken							\
 			  | (READ_PROC_READ_UNLOCK << base),	/* WAR */		\
 			  producetoken) ->						\
 		ooo_mem(i);								\
-		WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp2 - 1);	\
+		WRITE_CACHED_VAR(urcu_active_readers[get_readerid()], tmp - 1);		\
 		PRODUCE_TOKENS(proc_urcu_reader, producetoken);				\
 	skip
 
@@ -641,7 +666,6 @@ non_atomic3_end:
 					skip;
 				fi;
 			}
-		:: 1 -> skip;
 		fi;
 
 		goto non_atomic3_skip;
@@ -654,7 +678,7 @@ non_atomic3_skip:
 
 		atomic {
 			if
-			PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, READ_LOCK_OUT);
+			PROCEDURE_READ_LOCK(READ_LOCK_BASE, READ_PROD_NONE, 0, READ_LOCK_OUT);
 
 			:: CONSUME_TOKENS(proc_urcu_reader,
 					  READ_LOCK_OUT,		/* post-dominant */
@@ -662,7 +686,7 @@ non_atomic3_skip:
 				smp_mb_reader(i, j);
 				PRODUCE_TOKENS(proc_urcu_reader, READ_PROC_FIRST_MB);
 
-			PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB | READ_LOCK_OUT,
+			PROCEDURE_READ_LOCK(READ_LOCK_NESTED_BASE, READ_PROC_FIRST_MB, READ_LOCK_OUT,
 					    READ_LOCK_NESTED_OUT);
 
 			:: CONSUME_TOKENS(proc_urcu_reader,
@@ -720,12 +744,12 @@ rmb1_end:
 			/* reading urcu_active_readers, which have been written by
 			 * READ_UNLOCK_OUT : RAW */
 			PROCEDURE_READ_LOCK(READ_LOCK_UNROLL_BASE,
-					    READ_UNLOCK_OUT		/* RAW */
-					    | READ_PROC_SECOND_MB	/* mb() orders reads */
-					    | READ_PROC_FIRST_MB	/* mb() orders reads */
-					    | READ_LOCK_NESTED_OUT	/* RAW */
+					    READ_PROC_SECOND_MB		/* mb() orders reads */
+					    | READ_PROC_FIRST_MB,	/* mb() orders reads */
+					    READ_LOCK_NESTED_OUT	/* RAW */
 					    | READ_LOCK_OUT		/* RAW */
-					    | READ_UNLOCK_NESTED_OUT,	/* RAW */
+					    | READ_UNLOCK_NESTED_OUT	/* RAW */
+					    | READ_UNLOCK_OUT,		/* RAW */
 					    READ_LOCK_OUT_UNROLL);
 
 
@@ -810,14 +834,14 @@ rmb2_end:
 	goto end;
 rmb1:
 #ifndef NO_RMB
-	smp_rmb(i, j);
+	smp_rmb(i);
 #else
 	ooo_mem(i);
 #endif
 	goto rmb1_end;
 rmb2:
 #ifndef NO_RMB
-	smp_rmb(i, j);
+	smp_rmb(i);
 #else
 	ooo_mem(i);
 #endif
@@ -927,7 +951,7 @@ active proctype urcu_writer()
 	assert(get_pid() < NR_PROCS);
 
 	do
-	:: (loop_nr < 4) ->
+	:: (loop_nr < 3) ->
 #ifdef WRITER_PROGRESS
 progress_writer1:
 #endif
@@ -968,7 +992,7 @@ progress_writer1:
 		:: CONSUME_TOKENS(proc_urcu_writer,
 				  WRITE_DATA,
 				  WRITE_PROC_WMB) ->
-			smp_wmb(i, j);
+			smp_wmb(i);
 			PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_WMB);
 
 		:: CONSUME_TOKENS(proc_urcu_writer,
@@ -1003,10 +1027,11 @@ smp_mb_send1_end:
 			PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_FIRST_WRITE_GP);
 
 		:: CONSUME_TOKENS(proc_urcu_writer,
-				  //WRITE_PROC_FIRST_WRITE_GP	/* TEST ADDING SYNC CORE */
+				  //WRITE_PROC_FIRST_WRITE_GP |	/* TEST ADDING SYNC CORE */
 				  WRITE_PROC_FIRST_MB,	/* can be reordered before/after flips */
 				  WRITE_PROC_FIRST_WAIT | WRITE_PROC_FIRST_WAIT_LOOP) ->
 			ooo_mem(i);
+			//smp_mb(i);	/* TEST */
 			/* ONLY WAITING FOR READER 0 */
 			tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
 #ifndef SINGLE_FLIP
@@ -1034,6 +1059,12 @@ smp_mb_send1_end:
 #ifndef GEN_ERROR_WRITER_PROGRESS
 			goto smp_mb_send2;
 smp_mb_send2_end:
+			/* The memory barrier will invalidate the
+			 * second read done as prefetching. Note that all
+			 * instructions with side-effects depending on
+			 * WRITE_PROC_SECOND_READ_GP should also depend on
+			 * completion of this busy-waiting loop. */
+			CLEAR_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
 #else
 			ooo_mem(i);
 #endif
@@ -1042,17 +1073,21 @@ smp_mb_send2_end:
 
 		/* second flip */
 		:: CONSUME_TOKENS(proc_urcu_writer,
-				  WRITE_PROC_FIRST_WAIT		/* Control dependency : need to branch out of
-								 * the loop to execute the next flip (CHECK) */
-				  | WRITE_PROC_FIRST_WRITE_GP
+				  //WRITE_PROC_FIRST_WAIT |	//test	/* no dependency. Could pre-fetch, no side-effect. */
+				  WRITE_PROC_FIRST_WRITE_GP
 				  | WRITE_PROC_FIRST_READ_GP
 				  | WRITE_PROC_FIRST_MB,
 				  WRITE_PROC_SECOND_READ_GP) ->
 			ooo_mem(i);
+			//smp_mb(i);	/* TEST */
 			tmpa = READ_CACHED_VAR(urcu_gp_ctr);
 			PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_READ_GP);
 		:: CONSUME_TOKENS(proc_urcu_writer,
-				  WRITE_PROC_FIRST_MB
+				  WRITE_PROC_FIRST_WAIT			/* dependency on first wait, because this
+									 * instruction has globally observable
+									 * side-effects.
+									 */
+				  | WRITE_PROC_FIRST_MB
 				  | WRITE_PROC_WMB
 				  | WRITE_PROC_FIRST_READ_GP
 				  | WRITE_PROC_FIRST_WRITE_GP
@@ -1063,11 +1098,12 @@ smp_mb_send2_end:
 			PRODUCE_TOKENS(proc_urcu_writer, WRITE_PROC_SECOND_WRITE_GP);
 
 		:: CONSUME_TOKENS(proc_urcu_writer,
-				  //WRITE_PROC_FIRST_WRITE_GP	/* TEST ADDING SYNC CORE */
+				  //WRITE_PROC_FIRST_WRITE_GP |	/* TEST ADDING SYNC CORE */
 				  WRITE_PROC_FIRST_WAIT
 				  | WRITE_PROC_FIRST_MB,	/* can be reordered before/after flips */
 				  WRITE_PROC_SECOND_WAIT | WRITE_PROC_SECOND_WAIT_LOOP) ->
 			ooo_mem(i);
+			//smp_mb(i);	/* TEST */
 			/* ONLY WAITING FOR READER 0 */
 			tmp2 = READ_CACHED_VAR(urcu_active_readers[0]);
 			if
@@ -1079,7 +1115,7 @@ smp_mb_send2_end:
 			fi;
 
 		:: CONSUME_TOKENS(proc_urcu_writer,
-				  //WRITE_PROC_FIRST_WRITE_GP	/* TEST ADDING SYNC CORE */
+				  //WRITE_PROC_FIRST_WRITE_GP |	/* TEST ADDING SYNC CORE */
 				  WRITE_PROC_SECOND_WRITE_GP
 				  | WRITE_PROC_FIRST_WRITE_GP
 				  | WRITE_PROC_SECOND_READ_GP
@@ -1151,6 +1187,12 @@ end_writer:
 	:: 1 ->
 #ifdef WRITER_PROGRESS
 progress_writer2:
+#endif
+#ifdef READER_PROGRESS
+		/*
+		 * Make sure we don't block the reader's progress.
+		 */
+		smp_mb_send(i, j, 5);
 #endif
 		skip;
 	od;