+ rcu_thread_offline();
+ else
+ cmm_smp_mb();
+
+ /*
+ * Add ourself to gp_waiters stack of threads awaiting to wait
+ * for a grace period. Proceed to perform the grace period only
+ * if we are the first thread added into the stack.
+ */
+ cds_wfs_node_init(&gp_waiters_thread.node);
+ urcu_wait_init(&gp_waiters_thread.wait);
+ if (cds_wfs_push(&gp_waiters, &gp_waiters_thread.node) != 0) {
+ /* Not first in stack: will be awakened by another thread. */
+ urcu_adaptative_busy_wait(&gp_waiters_thread.wait);
+ goto gp_end;
+ }
+
+ mutex_lock(&rcu_gp_lock);
+
+ /*
+ * Pop all waiters into our local stack head.
+ */
+ gp_waiters_head = __cds_wfs_pop_all(&gp_waiters);
+
+ if (cds_list_empty(®istry))
+ goto out;
+
+ /* Increment current G.P. */
+ CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
+
+ /*
+ * Must commit rcu_gp_ctr update to memory before waiting for
+ * quiescent state. Failure to do so could result in the writer
+ * waiting forever while new readers are always accessing data
+ * (no progress). Enforce compiler-order of store to rcu_gp_ctr
+ * before load URCU_TLS(rcu_reader).ctr.
+ */
+ cmm_barrier();
+
+ /*
+ * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
+ * model easier to understand. It does not have a big performance impact
+ * anyway, given this is the write-side.
+ */
+ cmm_smp_mb();