Fix: auto-resize hash table destroy deadlock
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Fri, 3 Feb 2023 19:11:50 +0000 (14:11 -0500)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Fri, 10 Feb 2023 17:12:06 +0000 (12:12 -0500)
Fix a deadlock for auto-resize hash tables when cds_lfht_destroy
is called with RCU read-side lock held.

Example stack track of a hang:

  Thread 2 (Thread 0x7f21ba876700 (LWP 26114)):
  #0  syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
  #1  0x00007f21beba7aa0 in futex (val3=0, uaddr2=0x0, timeout=0x0, val=-1, op=0, uaddr=0x7f21bedac308 <urcu_memb_gp+8>) at ../include/urcu/futex.h:81
  #2  futex_noasync (timeout=0x0, uaddr2=0x0, val3=0, val=-1, op=0, uaddr=0x7f21bedac308 <urcu_memb_gp+8>) at ../include/urcu/futex.h:90
  #3  wait_gp () at urcu.c:265
  #4  wait_for_readers (input_readers=input_readers@entry=0x7f21ba8751b0, cur_snap_readers=cur_snap_readers@entry=0x0,
      qsreaders=qsreaders@entry=0x7f21ba8751c0) at urcu.c:357
  #5  0x00007f21beba8339 in urcu_memb_synchronize_rcu () at urcu.c:498
  #6  0x00007f21be99f93f in fini_table (last_order=<optimized out>, first_order=13, ht=0x5651cec75400) at rculfhash.c:1489
  #7  _do_cds_lfht_shrink (new_size=<optimized out>, old_size=<optimized out>, ht=0x5651cec75400) at rculfhash.c:2001
  #8  _do_cds_lfht_resize (ht=ht@entry=0x5651cec75400) at rculfhash.c:2023
  #9  0x00007f21be99fa26 in do_resize_cb (work=0x5651e20621a0) at rculfhash.c:2063
  #10 0x00007f21be99dbfd in workqueue_thread (arg=0x5651cec74a00) at workqueue.c:234
  #11 0x00007f21bd7c06db in start_thread (arg=0x7f21ba876700) at pthread_create.c:463
  #12 0x00007f21bd4e961f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

  Thread 1 (Thread 0x7f21bf285300 (LWP 26098)):
  #0  syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
  #1  0x00007f21be99d8b7 in futex (val3=0, uaddr2=0x0, timeout=0x0, val=-1, op=0, uaddr=0x5651d8b38584) at ../include/urcu/futex.h:81
  #2  futex_async (timeout=0x0, uaddr2=0x0, val3=0, val=-1, op=0, uaddr=0x5651d8b38584) at ../include/urcu/futex.h:113
  #3  futex_wait (futex=futex@entry=0x5651d8b38584) at workqueue.c:135
  #4  0x00007f21be99e2c8 in urcu_workqueue_wait_completion (completion=completion@entry=0x5651d8b38580) at workqueue.c:423
  #5  0x00007f21be99e3f9 in urcu_workqueue_flush_queued_work (workqueue=0x5651cec74a00) at workqueue.c:452
  #6  0x00007f21be9a0c83 in cds_lfht_destroy (ht=0x5651d8b2fcf0, attr=attr@entry=0x0) at rculfhash.c:1906

This deadlock is easy to reproduce when rapidly adding a large number of
entries in the cds_lfht, removing them, and calling cds_lfht_destroy().

The deadlock will occur if the call to cds_lfht_destroy() takes place
while a resize of the hash table is ongoing.

Fix this by moving the teardown of the lfht worker thread to libcds
library destructor, so it does not have to wait on synchronize_rcu from
a resize callback from within a read-side critical section. As a
consequence, the atfork callbacks are left registered within each urcu
flavor for which a resizeable hash table is created until the end of the
executable lifetime.

The other part of the fix is to move the hash table destruction to the
worker thread for auto-resize hash tables. This prevents having to wait
for resize callbacks from RCU read-side critical section. This is
guaranteed by the fact that the worker thread serializes previously
queued resize callbacks before the destroy callback.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: If8b1c3c8063dc7b9846dc5c3fc452efd917eab4d

include/urcu/rculfhash.h
src/rculfhash-internal.h
src/rculfhash.c
src/urcu-call-rcu-impl.h

index 29dd88f1f16b0ea320ffa82ef2ed0596c7aaba65..c18cf5290763dde5f86efe101adf0c1af72f8857 100644 (file)
@@ -237,16 +237,16 @@ struct cds_lfht *cds_lfht_new(unsigned long init_size,
  *
  * Return 0 on success, negative error value on error.
 
+ * Threads calling this API need to be registered RCU read-side threads.
+ *
  * Prior to liburcu 0.10:
- * - Threads calling this API need to be registered RCU read-side
- *   threads.
  * - cds_lfht_destroy should *not* be called from a RCU read-side
  *   critical section. It should *not* be called from a call_rcu thread
  *   context neither.
  *
  * Starting from liburcu 0.10, rculfhash implements its own worker
- * thread to handle resize operations, which removes RCU requirements on
- * cds_lfht_destroy.
+ * thread to handle resize operations, which removes the above RCU
+ * read-side critical section requirement on cds_lfht_destroy.
  */
 extern
 int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr);
index d29a9232c03e6c42c844d8bfaaf3f63e588c5473..68c4374695a039fed7230e8a21e46b4f2048b57e 100644 (file)
@@ -29,6 +29,8 @@
 #include <stdlib.h>
 #include <assert.h>
 
+#include "workqueue.h"
+
 #ifdef DEBUG
 #define dbg_printf(fmt, args...)     printf("[debug rculfhash] " fmt, ## args)
 #else
@@ -82,11 +84,13 @@ struct cds_lfht {
         * therefore cause grace-period deadlock if we hold off RCU G.P.
         * completion.
         */
-       pthread_mutex_t resize_mutex;   /* resize mutex: add/del mutex */
-       pthread_attr_t *resize_attr;    /* Resize threads attributes */
+       pthread_mutex_t resize_mutex;           /* resize mutex: add/del mutex */
+       pthread_attr_t *caller_resize_attr;     /* resize threads attributes from lfht_new caller */
+       pthread_attr_t resize_attr;
        unsigned int in_progress_destroy;
        unsigned long resize_target;
        int resize_initiated;
+       struct urcu_work destroy_work;
 
        /*
         * Variables needed for add and remove fast-paths.
index 5b62e05a8ba6f5134036300ef168353beeaab67a..93b0f2c66c9ae02b58a58bbbc741aa2bfaa9d7d3 100644 (file)
@@ -363,7 +363,6 @@ struct partition_resize_work {
 };
 
 static struct urcu_workqueue *cds_lfht_workqueue;
-static unsigned long cds_lfht_workqueue_user_count;
 
 /*
  * Mutex ensuring mutual exclusion between workqueue initialization and
@@ -380,8 +379,8 @@ static struct urcu_atfork cds_lfht_atfork;
  */
 static int cds_lfht_workqueue_atfork_nesting;
 
+static void __attribute__((destructor)) cds_lfht_exit(void);
 static void cds_lfht_init_worker(const struct rcu_flavor_struct *flavor);
-static void cds_lfht_fini_worker(const struct rcu_flavor_struct *flavor);
 
 #ifdef CONFIG_CDS_LFHT_ITER_DEBUG
 
@@ -1273,7 +1272,8 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i,
                work[thread].len = partition_len;
                work[thread].start = thread * partition_len;
                work[thread].fct = fct;
-               ret = pthread_create(&(work[thread].thread_id), ht->resize_attr,
+               ret = pthread_create(&(work[thread].thread_id),
+                       ht->caller_resize_attr ? &ht->resize_attr : NULL,
                        partition_resize_thread, &work[thread]);
                if (ret == EAGAIN) {
                        /*
@@ -1620,7 +1620,9 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,
 
        ht->flags = flags;
        ht->flavor = flavor;
-       ht->resize_attr = attr;
+       ht->caller_resize_attr = attr;
+       if (attr)
+               ht->resize_attr = *attr;
        alloc_split_items_count(ht);
        /* this mutex should not nest in read-side C.S. */
        pthread_mutex_init(&ht->resize_mutex, NULL);
@@ -1834,6 +1836,35 @@ int cds_lfht_is_node_deleted(const struct cds_lfht_node *node)
        return is_removed(CMM_LOAD_SHARED(node->next));
 }
 
+static
+bool cds_lfht_is_empty(struct cds_lfht *ht)
+{
+       struct cds_lfht_node *node, *next;
+       bool empty = true;
+       bool was_online;
+
+       was_online = ht->flavor->read_ongoing();
+       if (!was_online) {
+               ht->flavor->thread_online();
+               ht->flavor->read_lock();
+       }
+       /* Check that the table is empty */
+       node = bucket_at(ht, 0);
+       do {
+               next = rcu_dereference(node->next);
+               if (!is_bucket(next)) {
+                       empty = false;
+                       break;
+               }
+               node = clear_flag(next);
+       } while (!is_end(node));
+       if (!was_online) {
+               ht->flavor->read_unlock();
+               ht->flavor->thread_offline();
+       }
+       return empty;
+}
+
 static
 int cds_lfht_delete_bucket(struct cds_lfht *ht)
 {
@@ -1868,6 +1899,24 @@ int cds_lfht_delete_bucket(struct cds_lfht *ht)
        return 0;
 }
 
+static
+void do_auto_resize_destroy_cb(struct urcu_work *work)
+{
+       struct cds_lfht *ht = caa_container_of(work, struct cds_lfht, destroy_work);
+       int ret;
+
+       ht->flavor->register_thread();
+       ret = cds_lfht_delete_bucket(ht);
+       if (ret)
+               urcu_die(ret);
+       free_split_items_count(ht);
+       ret = pthread_mutex_destroy(&ht->resize_mutex);
+       if (ret)
+               urcu_die(ret);
+       ht->flavor->unregister_thread();
+       poison_free(ht);
+}
+
 /*
  * Should only be called when no more concurrent readers nor writers can
  * possibly access the table.
@@ -1877,22 +1926,38 @@ int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr)
        int ret;
 
        if (ht->flags & CDS_LFHT_AUTO_RESIZE) {
+               /*
+                * Perform error-checking for emptiness before queuing
+                * work, so we can return error to the caller. This runs
+                * concurrently with ongoing resize.
+                */
+               if (!cds_lfht_is_empty(ht))
+                       return -EPERM;
                /* Cancel ongoing resize operations. */
                _CMM_STORE_SHARED(ht->in_progress_destroy, 1);
-               /* Wait for in-flight resize operations to complete */
-               urcu_workqueue_flush_queued_work(cds_lfht_workqueue);
+               if (attr) {
+                       *attr = ht->caller_resize_attr;
+                       ht->caller_resize_attr = NULL;
+               }
+               /*
+                * Queue destroy work after prior queued resize
+                * operations. Given there are no concurrent writers
+                * accessing the hash table at this point, no resize
+                * operations can be queued after this destroy work.
+                */
+               urcu_workqueue_queue_work(cds_lfht_workqueue,
+                       &ht->destroy_work, do_auto_resize_destroy_cb);
+               return 0;
        }
        ret = cds_lfht_delete_bucket(ht);
        if (ret)
                return ret;
        free_split_items_count(ht);
        if (attr)
-               *attr = ht->resize_attr;
+               *attr = ht->caller_resize_attr;
        ret = pthread_mutex_destroy(&ht->resize_mutex);
        if (ret)
                ret = -EBUSY;
-       if (ht->flags & CDS_LFHT_AUTO_RESIZE)
-               cds_lfht_fini_worker(ht->flavor);
        poison_free(ht);
        return ret;
 }
@@ -2179,23 +2244,19 @@ static void cds_lfht_init_worker(const struct rcu_flavor_struct *flavor)
        flavor->register_rculfhash_atfork(&cds_lfht_atfork);
 
        mutex_lock(&cds_lfht_fork_mutex);
-       if (cds_lfht_workqueue_user_count++)
-               goto end;
-       cds_lfht_workqueue = urcu_workqueue_create(0, -1, NULL,
-               NULL, cds_lfht_worker_init, NULL, NULL, NULL, NULL, NULL);
-end:
+       if (!cds_lfht_workqueue)
+               cds_lfht_workqueue = urcu_workqueue_create(0, -1, NULL,
+                       NULL, cds_lfht_worker_init, NULL, NULL, NULL, NULL, NULL);
        mutex_unlock(&cds_lfht_fork_mutex);
 }
 
-static void cds_lfht_fini_worker(const struct rcu_flavor_struct *flavor)
+static void cds_lfht_exit(void)
 {
        mutex_lock(&cds_lfht_fork_mutex);
-       if (--cds_lfht_workqueue_user_count)
-               goto end;
-       urcu_workqueue_destroy(cds_lfht_workqueue);
-       cds_lfht_workqueue = NULL;
-end:
+       if (cds_lfht_workqueue) {
+               urcu_workqueue_flush_queued_work(cds_lfht_workqueue);
+               urcu_workqueue_destroy(cds_lfht_workqueue);
+               cds_lfht_workqueue = NULL;
+       }
        mutex_unlock(&cds_lfht_fork_mutex);
-
-       flavor->unregister_rculfhash_atfork(&cds_lfht_atfork);
 }
index 5d3ca4bccf2e91fd81a1400cb1add2a7c3ee9019..6301d135930a0da30f71e1ec21aa40585db40210 100644 (file)
@@ -102,7 +102,6 @@ static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
 static struct call_rcu_data *default_call_rcu_data;
 
 static struct urcu_atfork *registered_rculfhash_atfork;
-static unsigned long registered_rculfhash_atfork_refcount;
 
 /*
  * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
@@ -1060,24 +1059,25 @@ void alias_call_rcu_after_fork_child();
 
 void urcu_register_rculfhash_atfork(struct urcu_atfork *atfork)
 {
+       if (CMM_LOAD_SHARED(registered_rculfhash_atfork))
+               return;
        call_rcu_lock(&call_rcu_mutex);
-       if (registered_rculfhash_atfork_refcount++)
-               goto end;
-       registered_rculfhash_atfork = atfork;
-end:
+       if (!registered_rculfhash_atfork)
+               registered_rculfhash_atfork = atfork;
        call_rcu_unlock(&call_rcu_mutex);
 }
 URCU_ATTR_ALIAS(urcu_stringify(urcu_register_rculfhash_atfork))
 void alias_urcu_register_rculfhash_atfork();
 
+/*
+ * This unregistration function is deprecated, meant only for internal
+ * use by rculfhash.
+ */
+__attribute__((noreturn))
 void urcu_unregister_rculfhash_atfork(struct urcu_atfork *atfork __attribute__((unused)))
 {
-       call_rcu_lock(&call_rcu_mutex);
-       if (--registered_rculfhash_atfork_refcount)
-               goto end;
-       registered_rculfhash_atfork = NULL;
-end:
-       call_rcu_unlock(&call_rcu_mutex);
+       urcu_die(EPERM);
 }
 URCU_ATTR_ALIAS(urcu_stringify(urcu_unregister_rculfhash_atfork))
+__attribute__((noreturn))
 void alias_urcu_unregister_rculfhash_atfork();
This page took 0.030331 seconds and 4 git commands to generate.