From f864c15d3148f14019e837544213314db0198ccb Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sun, 11 Nov 2012 11:20:07 -0500 Subject: [PATCH] urcu-qsbr: skip Q.S. reporting if already reported We can skip both memory barriers and store reporting quiescent state if we notice we already reported Q.S. for the current value of "rcu_gp_ctr". It covers the two implementations of QSBR: * 64-bit architecture: we assume the counter never overflows, and therefore only perform one increment followed by waiting for readers. In this scenario, we don't care if the rcu_gp_ctr load is moved into the prior read-side critical section, as long as the URCU_TLS(rcu_reader).ctr store is ordered. * 32-bit architecture: given the 32-bit counter could overflow, we rely on a 2-phase approach, using a single bit: we flip the rcu_gp_ctr bit, then wait to observe that all readers have taken a copy of the new rcu_gp_ctr. We flip it again, and wait until we observe that all readers have copied its new value. We are then certain that each reader necessarily passed through a quiescent state during the grace period (and that Q.S. was not located prior to our grace period). This scheme works even if the rcu_gp_ctr load is moved into the prior read-side critical section, as long as store to URCU_TLS(rcu_reader).ctr is ordered with respect to other memory accesses within that thread. Suggested-by: Alan Stern Signed-off-by: Mathieu Desnoyers Acked-by: Alan Stern Acked-by: Paul E. McKenney --- urcu/static/urcu-qsbr.h | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/urcu/static/urcu-qsbr.h b/urcu/static/urcu-qsbr.h index c8a87b8..f314956 100644 --- a/urcu/static/urcu-qsbr.h +++ b/urcu/static/urcu-qsbr.h @@ -180,20 +180,43 @@ static inline void _rcu_read_unlock(void) { } +/* + * This is a helper function for _rcu_quiescent_state(). + * The first cmm_smp_mb() ensures memory accesses in the prior read-side + * critical sections are not reordered with store to + * URCU_TLS(rcu_reader).ctr, and ensures that mutexes held within an + * offline section that would happen to end with this + * rcu_quiescent_state() call are not reordered with + * store to URCU_TLS(rcu_reader).ctr. + */ +static inline void _rcu_quiescent_state_update_and_wakeup(unsigned long gp_ctr) +{ + cmm_smp_mb(); + _CMM_STORE_SHARED(URCU_TLS(rcu_reader).ctr, gp_ctr); + cmm_smp_mb(); /* write URCU_TLS(rcu_reader).ctr before read futex */ + wake_up_gp(); + cmm_smp_mb(); +} + /* * Inform RCU of a quiescent state. * * This function is less than 10 lines long. The intent is that this * function meets the 10-line criterion for LGPL, allowing this function * to be invoked directly from non-LGPL code. + * + * We skip the memory barriers and gp store if our local ctr already + * matches the global rcu_gp_ctr value: this is OK because a prior + * _rcu_quiescent_state() or _rcu_thread_online() already updated it + * within our thread, so we have no quiescent state to report. */ static inline void _rcu_quiescent_state(void) { - cmm_smp_mb(); - _CMM_STORE_SHARED(URCU_TLS(rcu_reader).ctr, _CMM_LOAD_SHARED(rcu_gp_ctr)); - cmm_smp_mb(); /* write URCU_TLS(rcu_reader).ctr before read futex */ - wake_up_gp(); - cmm_smp_mb(); + unsigned long gp_ctr; + + if ((gp_ctr = CMM_LOAD_SHARED(rcu_gp_ctr)) == URCU_TLS(rcu_reader).ctr) + return; + _rcu_quiescent_state_update_and_wakeup(gp_ctr); } /* -- 2.34.1