From 83a2c42177d2b28890bc3674d30f9ed5f8edb0a8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 17 Aug 2011 05:42:51 -0400 Subject: [PATCH] urcu-qsbr: avoid useless futex wakeups and burning CPU for long grace periods I noticed that urcu makes exactly _one_ attempt at using futexes to avoid busy looping on synchronize_rcu. The attached patch instead switches from busy waiting to futexes after RCU_QS_ACTIVE_ATTEMPTS. To limit the amount of system calls, reading threads remember whether they already had a quiescent state in this grace period; if so they were already removed from the list, and can avoid signaling the futex. Performance measured with rcutorture (nreaders: 10, nupdaters: 1, duration: 10, median of nine runs): RCU_QS_ACTIVE_ATTEMPTS == 100, no patch n_updates = 292 RCU_QS_ACTIVE_ATTEMPTS == 1, no patch n_updates = 290 RCU_QS_ACTIVE_ATTEMPTS == 100, with patch n_updates = 408 RCU_QS_ACTIVE_ATTEMPTS == 1, with patch n_updates = 404 (the first two cases are obviously the same; the only change is when the futex is used, but over many calls there is no difference). This patch matches the update to the Promela model. Signed-off-by: Paolo Bonzini Signed-off-by: Mathieu Desnoyers --- urcu-qsbr.c | 17 ++++++++++++----- urcu/static/urcu-qsbr.h | 7 ++++++- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/urcu-qsbr.c b/urcu-qsbr.c index a239be0..8d8a9cf 100644 --- a/urcu-qsbr.c +++ b/urcu-qsbr.c @@ -150,26 +150,33 @@ static void update_counter_and_wait(void) */ for (;;) { wait_loops++; - if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS) { - uatomic_dec(&gp_futex); + if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) { + uatomic_set(&gp_futex, -1); + /* + * Write futex before write waiting (the other side + * reads them in the opposite order). + */ + cmm_smp_wmb(); + cds_list_for_each_entry(index, ®istry, node) { + _CMM_STORE_SHARED(index->waiting, 1); + } /* Write futex before read reader_gp */ cmm_smp_mb(); } - cds_list_for_each_entry_safe(index, tmp, ®istry, node) { if (!rcu_gp_ongoing(&index->ctr)) cds_list_move(&index->node, &qsreaders); } if (cds_list_empty(®istry)) { - if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS) { + if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) { /* Read reader_gp before write futex */ cmm_smp_mb(); uatomic_set(&gp_futex, 0); } break; } else { - if (wait_loops == RCU_QS_ACTIVE_ATTEMPTS) { + if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) { wait_gp(); } else { #ifndef HAS_INCOHERENT_CACHES diff --git a/urcu/static/urcu-qsbr.h b/urcu/static/urcu-qsbr.h index 5b7adac..489abb0 100644 --- a/urcu/static/urcu-qsbr.h +++ b/urcu/static/urcu-qsbr.h @@ -124,6 +124,7 @@ struct rcu_reader { unsigned long ctr; /* Data used for registry */ struct cds_list_head node __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + int waiting; pthread_t tid; }; @@ -136,7 +137,11 @@ extern int32_t gp_futex; */ static inline void wake_up_gp(void) { - if (unlikely(uatomic_read(&gp_futex) == -1)) { + if (unlikely(_CMM_LOAD_SHARED(rcu_reader.waiting))) { + _CMM_STORE_SHARED(rcu_reader.waiting, 0); + cmm_smp_mb(); + if (uatomic_read(&gp_futex) != -1) + return; uatomic_set(&gp_futex, 0); futex_noasync(&gp_futex, FUTEX_WAKE, 1, NULL, NULL, 0); -- 2.34.1