- STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
-#endif /* !(BITS_PER_LONG < 64) */
+ CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
+#endif /* !(CAA_BITS_PER_LONG < 64) */
+
+ /*
+ * Must commit rcu_gp_ctr update to memory before waiting for
+ * quiescent state. Failure to do so could result in the writer
+ * waiting forever while new readers are always accessing data
+ * (no progress). Enforce compiler-order of store to rcu_gp_ctr
+ * before load URCU_TLS(rcu_reader).ctr.
+ */
+ cmm_barrier();
+
+ /*
+ * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
+ * model easier to understand. It does not have a big performance impact
+ * anyway, given this is the write-side.
+ */
+ cmm_smp_mb();