The transitive dependency between:
RCU read-side C.S. -> synchronize_rcu -> rcu_gp_lock -> rcu_register_thread
and the dependency:
pthread_join -> awaiting for thread completion
Can block a thread on join, and thus have the side-effect of deadlocking
a thread doing a pthread_join while within a RCU read-side critical
section. This join would be awaiting for completion of register_thread or
rcu_unregister_thread, which may never complete because the rcu_gp_lock
is held by synchronize_rcu executed from another thread.
One solution to fix this is to add a new lock, rcu_registry_lock. This
lock now protects the thread registry. It is released between iterations
on the registry by synchronize_rcu, thus allowing thread
registration/unregistration to complete even though synchronize_rcu is
awaiting for RCU read-side critical sections to complete.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
CC: Eugene Ivanov <Eugene.Ivanov@orc-group.com>
CC: Lai Jiangshan <laijs@cn.fujitsu.com>
CC: Stephen Hemminger <stephen@networkplumber.org>
static
void __attribute__((destructor)) _rcu_bp_exit(void);
static
void __attribute__((destructor)) _rcu_bp_exit(void);
+/*
+ * rcu_gp_lock ensures mutual exclusion between threads calling
+ * synchronize_rcu().
+ */
static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
+/*
+ * rcu_registry_lock ensures mutual exclusion between threads
+ * registering and unregistering themselves to/from the registry, and
+ * with threads reading that registry from synchronize_rcu(). However,
+ * this lock is not held all the way through the completion of awaiting
+ * for the grace period. It is sporadically released between iterations
+ * on the registry.
+ * rcu_registry_lock may nest inside rcu_gp_lock.
+ */
+static pthread_mutex_t rcu_registry_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
static int initialized;
static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
static int initialized;
+/*
+ * Always called with rcu_registry lock held. Releases this lock between
+ * iterations and grabs it again. Holds the lock when it returns.
+ */
void update_counter_and_wait(void)
{
CDS_LIST_HEAD(qsreaders);
void update_counter_and_wait(void)
{
CDS_LIST_HEAD(qsreaders);
if (cds_list_empty(®istry)) {
break;
} else {
if (cds_list_empty(®istry)) {
break;
} else {
+ /* Temporarily unlock the registry lock. */
+ mutex_unlock(&rcu_registry_lock);
if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS)
(void) poll(NULL, 0, RCU_SLEEP_DELAY_MS);
else
caa_cpu_relax();
if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS)
(void) poll(NULL, 0, RCU_SLEEP_DELAY_MS);
else
caa_cpu_relax();
+ /* Re-lock the registry lock before the next loop. */
+ mutex_lock(&rcu_registry_lock);
}
}
/* put back the reader list in the registry */
}
}
/* put back the reader list in the registry */
assert(!ret);
mutex_lock(&rcu_gp_lock);
assert(!ret);
mutex_lock(&rcu_gp_lock);
+ mutex_lock(&rcu_registry_lock);
if (cds_list_empty(®istry))
goto out;
if (cds_list_empty(®istry))
goto out;
/*
* Wait for previous parity to be empty of readers.
/*
* Wait for previous parity to be empty of readers.
+ * update_counter_and_wait() can release and grab again
+ * rcu_registry_lock interally.
*/
update_counter_and_wait(); /* 0 -> 1, wait readers in parity 0 */
*/
update_counter_and_wait(); /* 0 -> 1, wait readers in parity 0 */
/*
* Wait for previous parity to be empty of readers.
/*
* Wait for previous parity to be empty of readers.
+ * update_counter_and_wait() can release and grab again
+ * rcu_registry_lock interally.
*/
update_counter_and_wait(); /* 1 -> 0, wait readers in parity 1 */
*/
update_counter_and_wait(); /* 1 -> 0, wait readers in parity 1 */
+ mutex_unlock(&rcu_registry_lock);
mutex_unlock(&rcu_gp_lock);
ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
assert(!ret);
mutex_unlock(&rcu_gp_lock);
ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
assert(!ret);
- mutex_lock(&rcu_gp_lock);
+ mutex_lock(&rcu_registry_lock);
- mutex_unlock(&rcu_gp_lock);
+ mutex_unlock(&rcu_registry_lock);
end:
ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
if (ret)
end:
ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
if (ret)
- mutex_lock(&rcu_gp_lock);
+ mutex_lock(&rcu_registry_lock);
remove_thread(rcu_reader_reg);
remove_thread(rcu_reader_reg);
- mutex_unlock(&rcu_gp_lock);
+ mutex_unlock(&rcu_registry_lock);
ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
if (ret)
abort();
ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
if (ret)
abort();
- * Holding the rcu_gp_lock across fork will make sure we fork() don't race with
- * a concurrent thread executing with this same lock held. This ensures that the
- * registry is in a coherent state in the child.
+ * Holding the rcu_gp_lock and rcu_registry_lock across fork will make
+ * sure we fork() don't race with a concurrent thread executing with
+ * any of those locks held. This ensures that the registry and data
+ * protected by rcu_gp_lock are in a coherent state in the child.
*/
void rcu_bp_before_fork(void)
{
*/
void rcu_bp_before_fork(void)
{
ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
assert(!ret);
mutex_lock(&rcu_gp_lock);
ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
assert(!ret);
mutex_lock(&rcu_gp_lock);
+ mutex_lock(&rcu_registry_lock);
saved_fork_signal_mask = oldmask;
}
saved_fork_signal_mask = oldmask;
}
int ret;
oldmask = saved_fork_signal_mask;
int ret;
oldmask = saved_fork_signal_mask;
+ mutex_unlock(&rcu_registry_lock);
mutex_unlock(&rcu_gp_lock);
ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
assert(!ret);
mutex_unlock(&rcu_gp_lock);
ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
assert(!ret);
/*
* Prune all entries from registry except our own thread. Fits the Linux
/*
* Prune all entries from registry except our own thread. Fits the Linux
- * fork behavior. Called with rcu_gp_lock held.
+ * fork behavior. Called with rcu_gp_lock and rcu_registry_lock held.
*/
static
void urcu_bp_prune_registry(void)
*/
static
void urcu_bp_prune_registry(void)
urcu_bp_prune_registry();
oldmask = saved_fork_signal_mask;
urcu_bp_prune_registry();
oldmask = saved_fork_signal_mask;
+ mutex_unlock(&rcu_registry_lock);
mutex_unlock(&rcu_gp_lock);
ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
assert(!ret);
mutex_unlock(&rcu_gp_lock);
ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
assert(!ret);
void __attribute__((destructor)) rcu_exit(void);
void __attribute__((destructor)) rcu_exit(void);
+/*
+ * rcu_gp_lock ensures mutual exclusion between threads calling
+ * synchronize_rcu().
+ */
static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
+/*
+ * rcu_registry_lock ensures mutual exclusion between threads
+ * registering and unregistering themselves to/from the registry, and
+ * with threads reading that registry from synchronize_rcu(). However,
+ * this lock is not held all the way through the completion of awaiting
+ * for the grace period. It is sporadically released between iterations
+ * on the registry.
+ * rcu_registry_lock may nest inside rcu_gp_lock.
+ */
+static pthread_mutex_t rcu_registry_lock = PTHREAD_MUTEX_INITIALIZER;
+/*
+ * Always called with rcu_registry lock held. Releases this lock between
+ * iterations and grabs it again. Holds the lock when it returns.
+ */
static void update_counter_and_wait(void)
{
CDS_LIST_HEAD(qsreaders);
static void update_counter_and_wait(void)
{
CDS_LIST_HEAD(qsreaders);
+ /* Temporarily unlock the registry lock. */
+ mutex_unlock(&rcu_registry_lock);
if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
wait_gp();
} else {
if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
wait_gp();
} else {
cmm_smp_mb();
#endif /* #else #ifndef HAS_INCOHERENT_CACHES */
}
cmm_smp_mb();
#endif /* #else #ifndef HAS_INCOHERENT_CACHES */
}
+ /* Re-lock the registry lock before the next loop. */
+ mutex_lock(&rcu_registry_lock);
}
}
/* put back the reader list in the registry */
}
}
/* put back the reader list in the registry */
cmm_smp_mb();
mutex_lock(&rcu_gp_lock);
cmm_smp_mb();
mutex_lock(&rcu_gp_lock);
+ mutex_lock(&rcu_registry_lock);
if (cds_list_empty(®istry))
goto out;
/*
* Wait for previous parity to be empty of readers.
if (cds_list_empty(®istry))
goto out;
/*
* Wait for previous parity to be empty of readers.
+ * update_counter_and_wait() can release and grab again
+ * rcu_registry_lock interally.
*/
update_counter_and_wait(); /* 0 -> 1, wait readers in parity 0 */
*/
update_counter_and_wait(); /* 0 -> 1, wait readers in parity 0 */
/*
* Wait for previous parity to be empty of readers.
/*
* Wait for previous parity to be empty of readers.
+ * update_counter_and_wait() can release and grab again
+ * rcu_registry_lock interally.
*/
update_counter_and_wait(); /* 1 -> 0, wait readers in parity 1 */
out:
*/
update_counter_and_wait(); /* 1 -> 0, wait readers in parity 1 */
out:
+ mutex_unlock(&rcu_registry_lock);
mutex_unlock(&rcu_gp_lock);
/*
mutex_unlock(&rcu_gp_lock);
/*
cmm_smp_mb();
mutex_lock(&rcu_gp_lock);
cmm_smp_mb();
mutex_lock(&rcu_gp_lock);
+ mutex_lock(&rcu_registry_lock);
if (cds_list_empty(®istry))
goto out;
if (cds_list_empty(®istry))
goto out;
+ /*
+ * update_counter_and_wait() can release and grab again
+ * rcu_registry_lock interally.
+ */
update_counter_and_wait();
out:
update_counter_and_wait();
out:
+ mutex_unlock(&rcu_registry_lock);
mutex_unlock(&rcu_gp_lock);
if (was_online)
mutex_unlock(&rcu_gp_lock);
if (was_online)
URCU_TLS(rcu_reader).tid = pthread_self();
assert(URCU_TLS(rcu_reader).ctr == 0);
URCU_TLS(rcu_reader).tid = pthread_self();
assert(URCU_TLS(rcu_reader).ctr == 0);
- mutex_lock(&rcu_gp_lock);
+ mutex_lock(&rcu_registry_lock);
cds_list_add(&URCU_TLS(rcu_reader).node, ®istry);
cds_list_add(&URCU_TLS(rcu_reader).node, ®istry);
- mutex_unlock(&rcu_gp_lock);
+ mutex_unlock(&rcu_registry_lock);
* with a waiting writer.
*/
_rcu_thread_offline();
* with a waiting writer.
*/
_rcu_thread_offline();
- mutex_lock(&rcu_gp_lock);
+ mutex_lock(&rcu_registry_lock);
cds_list_del(&URCU_TLS(rcu_reader).node);
cds_list_del(&URCU_TLS(rcu_reader).node);
- mutex_unlock(&rcu_gp_lock);
+ mutex_unlock(&rcu_registry_lock);
void __attribute__((destructor)) rcu_exit(void);
#endif
void __attribute__((destructor)) rcu_exit(void);
#endif
+/*
+ * rcu_gp_lock ensures mutual exclusion between threads calling
+ * synchronize_rcu().
+ */
static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER;
+/*
+ * rcu_registry_lock ensures mutual exclusion between threads
+ * registering and unregistering themselves to/from the registry, and
+ * with threads reading that registry from synchronize_rcu(). However,
+ * this lock is not held all the way through the completion of awaiting
+ * for the grace period. It is sporadically released between iterations
+ * on the registry.
+ * rcu_registry_lock may nest inside rcu_gp_lock.
+ */
+static pthread_mutex_t rcu_registry_lock = PTHREAD_MUTEX_INITIALIZER;
* Written to only by writer with mutex taken. Read by both writer and readers.
*/
unsigned long rcu_gp_ctr = RCU_GP_COUNT;
* Written to only by writer with mutex taken. Read by both writer and readers.
*/
unsigned long rcu_gp_ctr = RCU_GP_COUNT;
/*
* Written to only by each individual reader. Read by both the reader and the
* writers.
/*
* Written to only by each individual reader. Read by both the reader and the
* writers.
+/*
+ * Always called with rcu_registry lock held. Releases this lock between
+ * iterations and grabs it again. Holds the lock when it returns.
+ */
void update_counter_and_wait(void)
{
CDS_LIST_HEAD(qsreaders);
void update_counter_and_wait(void)
{
CDS_LIST_HEAD(qsreaders);
+ /* Temporarily unlock the registry lock. */
+ mutex_unlock(&rcu_registry_lock);
if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS)
wait_gp();
else
caa_cpu_relax();
if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS)
wait_gp();
else
caa_cpu_relax();
+ /* Re-lock the registry lock before the next loop. */
+ mutex_lock(&rcu_registry_lock);
}
#else /* #ifndef HAS_INCOHERENT_CACHES */
/*
}
#else /* #ifndef HAS_INCOHERENT_CACHES */
/*
smp_mb_master(RCU_MB_GROUP);
wait_gp_loops = 0;
}
smp_mb_master(RCU_MB_GROUP);
wait_gp_loops = 0;
}
+ /* Temporarily unlock the registry lock. */
+ mutex_unlock(&rcu_registry_lock);
if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
wait_gp();
wait_gp_loops++;
} else {
caa_cpu_relax();
}
if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) {
wait_gp();
wait_gp_loops++;
} else {
caa_cpu_relax();
}
+ /* Re-lock the registry lock before the next loop. */
+ mutex_lock(&rcu_registry_lock);
}
#endif /* #else #ifndef HAS_INCOHERENT_CACHES */
}
}
#endif /* #else #ifndef HAS_INCOHERENT_CACHES */
}
void synchronize_rcu(void)
{
mutex_lock(&rcu_gp_lock);
void synchronize_rcu(void)
{
mutex_lock(&rcu_gp_lock);
+ mutex_lock(&rcu_registry_lock);
if (cds_list_empty(®istry))
goto out;
if (cds_list_empty(®istry))
goto out;
- /* All threads should read qparity before accessing data structure
- * where new ptr points to. Must be done within rcu_gp_lock because it
- * iterates on reader threads.*/
+ /*
+ * All threads should read qparity before accessing data structure
+ * where new ptr points to. Must be done within rcu_registry_lock
+ * because it iterates on reader threads.
+ */
/* Write new ptr before changing the qparity */
smp_mb_master(RCU_MB_GROUP);
/*
* Wait for previous parity to be empty of readers.
/* Write new ptr before changing the qparity */
smp_mb_master(RCU_MB_GROUP);
/*
* Wait for previous parity to be empty of readers.
+ * update_counter_and_wait() can release and grab again
+ * rcu_registry_lock interally.
*/
update_counter_and_wait(); /* 0 -> 1, wait readers in parity 0 */
*/
update_counter_and_wait(); /* 0 -> 1, wait readers in parity 0 */
/*
* Wait for previous parity to be empty of readers.
/*
* Wait for previous parity to be empty of readers.
+ * update_counter_and_wait() can release and grab again
+ * rcu_registry_lock interally.
*/
update_counter_and_wait(); /* 1 -> 0, wait readers in parity 1 */
*/
update_counter_and_wait(); /* 1 -> 0, wait readers in parity 1 */
- /* Finish waiting for reader threads before letting the old ptr being
- * freed. Must be done within rcu_gp_lock because it iterates on reader
- * threads. */
+ /*
+ * Finish waiting for reader threads before letting the old ptr
+ * being freed. Must be done within rcu_registry_lock because it
+ * iterates on reader threads.
+ */
smp_mb_master(RCU_MB_GROUP);
out:
smp_mb_master(RCU_MB_GROUP);
out:
+ mutex_unlock(&rcu_registry_lock);
mutex_unlock(&rcu_gp_lock);
}
mutex_unlock(&rcu_gp_lock);
}
assert(URCU_TLS(rcu_reader).need_mb == 0);
assert(!(URCU_TLS(rcu_reader).ctr & RCU_GP_CTR_NEST_MASK));
assert(URCU_TLS(rcu_reader).need_mb == 0);
assert(!(URCU_TLS(rcu_reader).ctr & RCU_GP_CTR_NEST_MASK));
- mutex_lock(&rcu_gp_lock);
+ mutex_lock(&rcu_registry_lock);
rcu_init(); /* In case gcc does not support constructor attribute */
cds_list_add(&URCU_TLS(rcu_reader).node, ®istry);
rcu_init(); /* In case gcc does not support constructor attribute */
cds_list_add(&URCU_TLS(rcu_reader).node, ®istry);
- mutex_unlock(&rcu_gp_lock);
+ mutex_unlock(&rcu_registry_lock);
}
void rcu_unregister_thread(void)
{
}
void rcu_unregister_thread(void)
{
- mutex_lock(&rcu_gp_lock);
+ mutex_lock(&rcu_registry_lock);
cds_list_del(&URCU_TLS(rcu_reader).node);
cds_list_del(&URCU_TLS(rcu_reader).node);
- mutex_unlock(&rcu_gp_lock);
+ mutex_unlock(&rcu_registry_lock);
* rcu_init constructor. Called when the library is linked, but also when
* reader threads are calling rcu_register_thread().
* Should only be called by a single thread at a given time. This is ensured by
* rcu_init constructor. Called when the library is linked, but also when
* reader threads are calling rcu_register_thread().
* Should only be called by a single thread at a given time. This is ensured by
- * holing the rcu_gp_lock from rcu_register_thread() or by running at library
- * load time, which should not be executed by multiple threads nor concurrently
- * with rcu_register_thread() anyway.
+ * holing the rcu_registry_lock from rcu_register_thread() or by running
+ * at library load time, which should not be executed by multiple
+ * threads nor concurrently with rcu_register_thread() anyway.