X-Git-Url: https://git.liburcu.org/?a=blobdiff_plain;f=urcu-qsbr.c;h=7f747ed5f9ce8ef133df3c2d026b8b76154e2e9e;hb=6362f68f024fd85fefe342b1f82d8787146c1ebb;hp=5e4348449ca9183b61bf1cbc6a041a549f784c2b;hpb=71c811bf8db75c9502b295edb1e190f978682b65;p=urcu.git

diff --git a/urcu-qsbr.c b/urcu-qsbr.c
index 5e43484..7f747ed 100644
--- a/urcu-qsbr.c
+++ b/urcu-qsbr.c
@@ -35,10 +35,15 @@
 #include <errno.h>
 #include <poll.h>
 
-#include "urcu/wfqueue.h"
+#include "urcu/wfcqueue.h"
+#include "urcu/wfstack.h"
 #include "urcu/map/urcu-qsbr.h"
 #define BUILD_QSBR_LIB
 #include "urcu/static/urcu-qsbr.h"
+#include "urcu-pointer.h"
+#include "urcu/tls-compat.h"
+
+#include "urcu-die.h"
 
 /* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */
 #undef _LGPL_SOURCE
@@ -49,7 +54,7 @@ void __attribute__((destructor)) rcu_exit(void);
 
 static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
 
-int32_t gp_futex;
+int32_t rcu_gp_futex;
 
 /*
  * Global grace period counter.
@@ -65,32 +70,56 @@ unsigned long rcu_gp_ctr = RCU_GP_ONLINE;
  * Written to only by each individual reader. Read by both the reader and the
  * writers.
  */
-struct rcu_reader __thread rcu_reader;
+DEFINE_URCU_TLS(struct rcu_reader, rcu_reader);
 
 #ifdef DEBUG_YIELD
-unsigned int yield_active;
-unsigned int __thread rand_yield;
+unsigned int rcu_yield_active;
+DEFINE_URCU_TLS(unsigned int, rcu_rand_yield);
 #endif
 
 static CDS_LIST_HEAD(registry);
 
+/*
+ * Number of busy-loop attempts before waiting on futex for grace period
+ * batching.
+ */
+#define RCU_AWAKE_ATTEMPTS 1000
+
+enum adapt_wakeup_state {
+	/* AWAKE_WAITING is compared directly (futex compares it). */
+	AWAKE_WAITING =		0,
+	/* non-zero are used as masks. */
+	AWAKE_WAKEUP =		(1 << 0),
+	AWAKE_AWAKENED =	(1 << 1),
+	AWAKE_TEARDOWN =	(1 << 2),
+};
+
+struct gp_waiters_thread {
+	struct cds_wfs_node node;
+	int32_t wait_futex;
+};
+
+/*
+ * Stack keeping threads awaiting to wait for a grace period. Contains
+ * struct gp_waiters_thread objects.
+ */
+static struct cds_wfs_stack gp_waiters = {
+	.head = CDS_WFS_END,
+	.lock = PTHREAD_MUTEX_INITIALIZER,
+};
+
 static void mutex_lock(pthread_mutex_t *mutex)
 {
 	int ret;
 
 #ifndef DISTRUST_SIGNALS_EXTREME
 	ret = pthread_mutex_lock(mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
+	if (ret)
+		urcu_die(ret);
 #else /* #ifndef DISTRUST_SIGNALS_EXTREME */
 	while ((ret = pthread_mutex_trylock(mutex)) != 0) {
-		if (ret != EBUSY && ret != EINTR) {
-			printf("ret = %d, errno = %d\n", ret, errno);
-			perror("Error in pthread mutex lock");
-			exit(-1);
-		}
+		if (ret != EBUSY && ret != EINTR)
+			urcu_die(ret);
 		poll(NULL,0,10);
 	}
 #endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */
@@ -101,10 +130,8 @@ static void mutex_unlock(pthread_mutex_t *mutex)
 	int ret;
 
 	ret = pthread_mutex_unlock(mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
+	if (ret)
+		urcu_die(ret);
 }
 
 /*
@@ -114,69 +141,118 @@ static void wait_gp(void)
 {
 	/* Read reader_gp before read futex */
 	cmm_smp_rmb();
-	if (uatomic_read(&gp_futex) == -1)
-		futex_noasync(&gp_futex, FUTEX_WAIT, -1,
+	if (uatomic_read(&rcu_gp_futex) == -1)
+		futex_noasync(&rcu_gp_futex, FUTEX_WAIT, -1,
 		      NULL, NULL, 0);
 }
 
-static void update_counter_and_wait(void)
+/*
+ * Note: urcu_adaptative_wake_up needs "value" to stay allocated
+ * throughout its execution. In this scheme, the waiter owns the futex
+ * memory, and we only allow it to free this memory when it receives the
+ * AWAKE_TEARDOWN flag.
+ */
+static void urcu_adaptative_wake_up(int32_t *value)
 {
-	CDS_LIST_HEAD(qsreaders);
-	int wait_loops = 0;
-	struct rcu_reader *index, *tmp;
+	cmm_smp_mb();
+	assert(uatomic_read(value) == AWAKE_WAITING);
+	uatomic_set(value, AWAKE_WAKEUP);
+	if (!(uatomic_read(value) & AWAKE_AWAKENED))
+		futex_noasync(value, FUTEX_WAKE, 1, NULL, NULL, 0);
+	/* Allow teardown of "value" memory. */
+	uatomic_or(value, AWAKE_TEARDOWN);
+}
 
-#if (CAA_BITS_PER_LONG < 64)
-	/* Switch parity: 0 -> 1, 1 -> 0 */
-	CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
-#else	/* !(CAA_BITS_PER_LONG < 64) */
-	/* Increment current G.P. */
-	CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
-#endif	/* !(CAA_BITS_PER_LONG < 64) */
+/*
+ * Caller must initialize "value" to AWAKE_WAITING before passing its
+ * memory to waker thread.
+ */
+static void urcu_adaptative_busy_wait(int32_t *value)
+{
+	unsigned int i;
 
-	/*
-	 * Must commit rcu_gp_ctr update to memory before waiting for
-	 * quiescent state. Failure to do so could result in the writer
-	 * waiting forever while new readers are always accessing data
-	 * (no progress). Enforce compiler-order of store to rcu_gp_ctr
-	 * before load rcu_reader ctr.
-	 */
-	cmm_barrier();
+	/* Load and test condition before read futex */
+	cmm_smp_rmb();
+	for (i = 0; i < RCU_AWAKE_ATTEMPTS; i++) {
+		if (uatomic_read(value) != AWAKE_WAITING)
+			goto skip_futex_wait;
+		caa_cpu_relax();
+	}
+	futex_noasync(value, FUTEX_WAIT, AWAKE_WAITING, NULL, NULL, 0);
+skip_futex_wait:
+
+	/* Tell waker thread than we are awakened. */
+	uatomic_or(value, AWAKE_AWAKENED);
 
 	/*
-	 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
-	 * model easier to understand. It does not have a big performance impact
-	 * anyway, given this is the write-side.
+	 * Wait until waker thread lets us know it's ok to tear down
+	 * memory allocated for value.
 	 */
-	cmm_smp_mb();
+	for (i = 0; i < RCU_AWAKE_ATTEMPTS; i++) {
+		if (uatomic_read(value) & AWAKE_TEARDOWN)
+			break;
+		caa_cpu_relax();
+	}
+	while (!(uatomic_read(value) & AWAKE_TEARDOWN))
+		poll(NULL, 0, 10);
+	assert(uatomic_read(value) & AWAKE_TEARDOWN);
+}
+
+static void wait_for_readers(struct cds_list_head *input_readers,
+			struct cds_list_head *cur_snap_readers,
+			struct cds_list_head *qsreaders)
+{
+	int wait_loops = 0;
+	struct rcu_reader *index, *tmp;
 
 	/*
-	 * Wait for each thread rcu_reader_qs_gp count to become 0.
+	 * Wait for each thread URCU_TLS(rcu_reader).ctr to either
+	 * indicate quiescence (offline), or for them to observe the
+	 * current rcu_gp_ctr value.
 	 */
 	for (;;) {
 		wait_loops++;
 		if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
-			uatomic_set(&gp_futex, -1);
+			uatomic_set(&rcu_gp_futex, -1);
 			/*
 			 * Write futex before write waiting (the other side
 			 * reads them in the opposite order).
 			 */
 			cmm_smp_wmb();
-			cds_list_for_each_entry(index, &registry, node) {
+			cds_list_for_each_entry(index, input_readers, node) {
 				_CMM_STORE_SHARED(index->waiting, 1);
 			}
 			/* Write futex before read reader_gp */
 			cmm_smp_mb();
 		}
-		cds_list_for_each_entry_safe(index, tmp, &registry, node) {
-			if (!rcu_gp_ongoing(&index->ctr))
-				cds_list_move(&index->node, &qsreaders);
+		cds_list_for_each_entry_safe(index, tmp, input_readers, node) {
+			switch (rcu_reader_state(&index->ctr)) {
+			case RCU_READER_ACTIVE_CURRENT:
+				if (cur_snap_readers) {
+					cds_list_move(&index->node,
+						cur_snap_readers);
+					break;
+				}
+				/* Fall-through */
+			case RCU_READER_INACTIVE:
+				cds_list_move(&index->node, qsreaders);
+				break;
+			case RCU_READER_ACTIVE_OLD:
+				/*
+				 * Old snapshot. Leaving node in
+				 * input_readers will make us busy-loop
+				 * until the snapshot becomes current or
+				 * the reader becomes inactive.
+				 */
+				break;
+			}
 		}
 
-		if (cds_list_empty(&registry)) {
+		if (cds_list_empty(input_readers)) {
 			if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
 				/* Read reader_gp before write futex */
 				cmm_smp_mb();
-				uatomic_set(&gp_futex, 0);
+				uatomic_set(&rcu_gp_futex, 0);
 			}
 			break;
 		} else {
@@ -191,8 +267,6 @@ static void update_counter_and_wait(void)
 			}
 		}
 	}
-	/* put back the reader list in the registry */
-	cds_list_splice(&qsreaders, &registry);
 }
 
 /*
@@ -203,41 +277,82 @@ static void update_counter_and_wait(void)
 #if (CAA_BITS_PER_LONG < 64)
 void synchronize_rcu(void)
 {
+	CDS_LIST_HEAD(cur_snap_readers);
+	CDS_LIST_HEAD(qsreaders);
 	unsigned long was_online;
+	struct gp_waiters_thread gp_waiters_thread;
+	struct cds_wfs_head *gp_waiters_head;
+	struct cds_wfs_node *waiters_iter, *waiters_iter_n;
 
-	was_online = rcu_reader.ctr;
+	was_online = URCU_TLS(rcu_reader).ctr;
 
 	/* All threads should read qparity before accessing data structure
-	 * where new ptr points to.
-	 */
-	/* Write new ptr before changing the qparity */
-	cmm_smp_mb();
-
-	/*
+	 * where new ptr points to.  In the "then" case, rcu_thread_offline
+	 * includes a memory barrier.
+	 *
 	 * Mark the writer thread offline to make sure we don't wait for
 	 * our own quiescent state. This allows using synchronize_rcu()
 	 * in threads registered as readers.
 	 */
 	if (was_online)
-		CMM_STORE_SHARED(rcu_reader.ctr, 0);
+		rcu_thread_offline();
+	else
+		cmm_smp_mb();
+
+	/*
+	 * Add ourself to gp_waiters stack of threads awaiting to wait
+	 * for a grace period. Proceed to perform the grace period only
+	 * if we are the first thread added into the stack.
+	 */
+	cds_wfs_node_init(&gp_waiters_thread.node);
+	gp_waiters_thread.wait_futex = AWAKE_WAITING;
+	if (cds_wfs_push(&gp_waiters, &gp_waiters_node) != 0) {
+		/* Not first in stack: will be awakened by another thread. */
+		urcu_adaptative_busy_wait(&gp_waiters_thread.wait_futex);
+		goto gp_end;
+	}
 
 	mutex_lock(&rcu_gp_lock);
 
+	/*
+	 * Pop all waiters into our local stack head.
+	 */
+	gp_waiters_head = __cds_wfs_pop_all(&gp_waiters);
+
 	if (cds_list_empty(&registry))
 		goto out;
 
 	/*
-	 * Wait for previous parity to be empty of readers.
+	 * Wait for readers to observe original parity or be quiescent.
 	 */
-	update_counter_and_wait();	/* 0 -> 1, wait readers in parity 0 */
+	wait_for_readers(&registry, &cur_snap_readers, &qsreaders);
 
 	/*
-	 * Must finish waiting for quiescent state for parity 0 before
-	 * committing next rcu_gp_ctr update to memory. Failure to
-	 * do so could result in the writer waiting forever while new
+	 * Must finish waiting for quiescent state for original parity
+	 * before committing next rcu_gp_ctr update to memory. Failure
+	 * to do so could result in the writer waiting forever while new
 	 * readers are always accessing data (no progress).  Enforce
-	 * compiler-order of load rcu_reader ctr before store to
-	 * rcu_gp_ctr.
+	 * compiler-order of load URCU_TLS(rcu_reader).ctr before store
+	 * to rcu_gp_ctr.
+	 */
+	cmm_barrier();
+
+	/*
+	 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
+	 * model easier to understand. It does not have a big performance impact
+	 * anyway, given this is the write-side.
+	 */
+	cmm_smp_mb();
+
+	/* Switch parity: 0 -> 1, 1 -> 0 */
+	CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+
+	/*
+	 * Must commit rcu_gp_ctr update to memory before waiting for
+	 * quiescent state. Failure to do so could result in the writer
+	 * waiting forever while new readers are always accessing data
+	 * (no progress). Enforce compiler-order of store to rcu_gp_ctr
+	 * before load URCU_TLS(rcu_reader).ctr.
 	 */
 	cmm_barrier();
 
@@ -249,48 +364,131 @@ void synchronize_rcu(void)
 	cmm_smp_mb();
 
 	/*
-	 * Wait for previous parity to be empty of readers.
+	 * Wait for readers to observe new parity or be quiescent.
+	 */
+	wait_for_readers(&cur_snap_readers, NULL, &qsreaders);
+
+	/*
+	 * Put quiescent reader list back into registry.
 	 */
-	update_counter_and_wait();	/* 1 -> 0, wait readers in parity 1 */
+	cds_list_splice(&qsreaders, &registry);
 out:
 	mutex_unlock(&rcu_gp_lock);
 
+	/* Wake all waiters in our stack head, excluding ourself. */
+	cds_wfs_for_each_blocking_safe(gp_waiters_head, waiters_iter,
+				waiters_iter_n) {
+		struct gp_waiters_thread *wt;
+
+		wt = caa_container_of(waiters_iter,
+				struct gp_waiters_thread, node);
+		if (wt == &gp_waiters_thread)
+			continue;
+		urcu_adaptative_wake_up(&wt->wait_futex);
+	}
+
+gp_end:
 	/*
 	 * Finish waiting for reader threads before letting the old ptr being
 	 * freed.
 	 */
 	if (was_online)
-		_CMM_STORE_SHARED(rcu_reader.ctr,
-				  CMM_LOAD_SHARED(rcu_gp_ctr));
-	cmm_smp_mb();
+		rcu_thread_online();
+	else
+		cmm_smp_mb();
 }
 #else /* !(CAA_BITS_PER_LONG < 64) */
 void synchronize_rcu(void)
 {
+	CDS_LIST_HEAD(qsreaders);
 	unsigned long was_online;
+	struct gp_waiters_thread gp_waiters_thread;
+	struct cds_wfs_head *gp_waiters_head;
+	struct cds_wfs_node *waiters_iter, *waiters_iter_n;
 
-	was_online = rcu_reader.ctr;
+	was_online = URCU_TLS(rcu_reader).ctr;
 
 	/*
 	 * Mark the writer thread offline to make sure we don't wait for
 	 * our own quiescent state. This allows using synchronize_rcu()
 	 * in threads registered as readers.
 	 */
-	cmm_smp_mb();
 	if (was_online)
-		CMM_STORE_SHARED(rcu_reader.ctr, 0);
+		rcu_thread_offline();
+	else
+		cmm_smp_mb();
+
+	/*
+	 * Add ourself to gp_waiters stack of threads awaiting to wait
+	 * for a grace period. Proceed to perform the grace period only
+	 * if we are the first thread added into the stack.
+	 */
+	cds_wfs_node_init(&gp_waiters_thread.node);
+	gp_waiters_thread.wait_futex = AWAKE_WAITING;
+	if (cds_wfs_push(&gp_waiters, &gp_waiters_thread.node) != 0) {
+		/* Not first in stack: will be awakened by another thread. */
+		urcu_adaptative_busy_wait(&gp_waiters_thread.wait_futex);
+		goto gp_end;
+	}
 
 	mutex_lock(&rcu_gp_lock);
+
+	/*
+	 * Pop all waiters into our local stack head.
+	 */
+	gp_waiters_head = __cds_wfs_pop_all(&gp_waiters);
+
 	if (cds_list_empty(&registry))
 		goto out;
-	update_counter_and_wait();
+
+	/* Increment current G.P. */
+	CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
+
+	/*
+	 * Must commit rcu_gp_ctr update to memory before waiting for
+	 * quiescent state. Failure to do so could result in the writer
+	 * waiting forever while new readers are always accessing data
+	 * (no progress). Enforce compiler-order of store to rcu_gp_ctr
+	 * before load URCU_TLS(rcu_reader).ctr.
+	 */
+	cmm_barrier();
+
+	/*
+	 * Adding a cmm_smp_mb() which is _not_ formally required, but makes the
+	 * model easier to understand. It does not have a big performance impact
+	 * anyway, given this is the write-side.
+	 */
+	cmm_smp_mb();
+
+	/*
+	 * Wait for readers to observe new count of be quiescent.
+	 */
+	wait_for_readers(&registry, NULL, &qsreaders);
+
+	/*
+	 * Put quiescent reader list back into registry.
+	 */
+	cds_list_splice(&qsreaders, &registry);
 out:
 	mutex_unlock(&rcu_gp_lock);
 
+	/* Wake all waiters in our stack head, excluding ourself. */
+	cds_wfs_for_each_blocking_safe(gp_waiters_head, waiters_iter,
+				waiters_iter_n) {
+		struct gp_waiters_thread *wt;
+
+		wt = caa_container_of(waiters_iter,
+				struct gp_waiters_thread, node);
+		if (wt == &gp_waiters_thread)
+			continue;
+		urcu_adaptative_wake_up(&wt->wait_futex);
+	}
+
+gp_end:
 	if (was_online)
-		_CMM_STORE_SHARED(rcu_reader.ctr,
-				  CMM_LOAD_SHARED(rcu_gp_ctr));
-	cmm_smp_mb();
+		rcu_thread_online();
+	else
+		cmm_smp_mb();
 }
 #endif  /* !(CAA_BITS_PER_LONG < 64) */
 
@@ -325,11 +523,11 @@ void rcu_thread_online(void)
 
 void rcu_register_thread(void)
 {
-	rcu_reader.tid = pthread_self();
-	assert(rcu_reader.ctr == 0);
+	URCU_TLS(rcu_reader).tid = pthread_self();
+	assert(URCU_TLS(rcu_reader).ctr == 0);
 
 	mutex_lock(&rcu_gp_lock);
-	cds_list_add(&rcu_reader.node, &registry);
+	cds_list_add(&URCU_TLS(rcu_reader).node, &registry);
 	mutex_unlock(&rcu_gp_lock);
 	_rcu_thread_online();
 }
@@ -342,14 +540,20 @@ void rcu_unregister_thread(void)
 	 */
 	_rcu_thread_offline();
 	mutex_lock(&rcu_gp_lock);
-	cds_list_del(&rcu_reader.node);
+	cds_list_del(&URCU_TLS(rcu_reader).node);
 	mutex_unlock(&rcu_gp_lock);
 }
 
 void rcu_exit(void)
 {
-	assert(cds_list_empty(&registry));
+	/*
+	 * Assertion disabled because call_rcu threads are now rcu
+	 * readers, and left running at exit.
+	 * assert(cds_list_empty(&registry));
+	 */
 }
 
+DEFINE_RCU_FLAVOR(rcu_flavor);
+
 #include "urcu-call-rcu-impl.h"
 #include "urcu-defer-impl.h"