Fix: auto-resize hash table destroy deadlock
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Fri, 3 Feb 2023 19:11:50 +0000 (14:11 -0500)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Fri, 10 Feb 2023 17:05:59 +0000 (12:05 -0500)
Fix a deadlock for auto-resize hash tables when cds_lfht_destroy
is called with RCU read-side lock held.

Example stack track of a hang:

  Thread 2 (Thread 0x7f21ba876700 (LWP 26114)):
  #0  syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
  #1  0x00007f21beba7aa0 in futex (val3=0, uaddr2=0x0, timeout=0x0, val=-1, op=0, uaddr=0x7f21bedac308 <urcu_memb_gp+8>) at ../include/urcu/futex.h:81
  #2  futex_noasync (timeout=0x0, uaddr2=0x0, val3=0, val=-1, op=0, uaddr=0x7f21bedac308 <urcu_memb_gp+8>) at ../include/urcu/futex.h:90
  #3  wait_gp () at urcu.c:265
  #4  wait_for_readers (input_readers=input_readers@entry=0x7f21ba8751b0, cur_snap_readers=cur_snap_readers@entry=0x0,
      qsreaders=qsreaders@entry=0x7f21ba8751c0) at urcu.c:357
  #5  0x00007f21beba8339 in urcu_memb_synchronize_rcu () at urcu.c:498
  #6  0x00007f21be99f93f in fini_table (last_order=<optimized out>, first_order=13, ht=0x5651cec75400) at rculfhash.c:1489
  #7  _do_cds_lfht_shrink (new_size=<optimized out>, old_size=<optimized out>, ht=0x5651cec75400) at rculfhash.c:2001
  #8  _do_cds_lfht_resize (ht=ht@entry=0x5651cec75400) at rculfhash.c:2023
  #9  0x00007f21be99fa26 in do_resize_cb (work=0x5651e20621a0) at rculfhash.c:2063
  #10 0x00007f21be99dbfd in workqueue_thread (arg=0x5651cec74a00) at workqueue.c:234
  #11 0x00007f21bd7c06db in start_thread (arg=0x7f21ba876700) at pthread_create.c:463
  #12 0x00007f21bd4e961f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

  Thread 1 (Thread 0x7f21bf285300 (LWP 26098)):
  #0  syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
  #1  0x00007f21be99d8b7 in futex (val3=0, uaddr2=0x0, timeout=0x0, val=-1, op=0, uaddr=0x5651d8b38584) at ../include/urcu/futex.h:81
  #2  futex_async (timeout=0x0, uaddr2=0x0, val3=0, val=-1, op=0, uaddr=0x5651d8b38584) at ../include/urcu/futex.h:113
  #3  futex_wait (futex=futex@entry=0x5651d8b38584) at workqueue.c:135
  #4  0x00007f21be99e2c8 in urcu_workqueue_wait_completion (completion=completion@entry=0x5651d8b38580) at workqueue.c:423
  #5  0x00007f21be99e3f9 in urcu_workqueue_flush_queued_work (workqueue=0x5651cec74a00) at workqueue.c:452
  #6  0x00007f21be9a0c83 in cds_lfht_destroy (ht=0x5651d8b2fcf0, attr=attr@entry=0x0) at rculfhash.c:1906

This deadlock is easy to reproduce when rapidly adding a large number of
entries in the cds_lfht, removing them, and calling cds_lfht_destroy().

The deadlock will occur if the call to cds_lfht_destroy() takes place
while a resize of the hash table is ongoing.

Fix this by moving the teardown of the lfht worker thread to libcds
library destructor, so it does not have to wait on synchronize_rcu from
a resize callback from within a read-side critical section. As a
consequence, the atfork callbacks are left registered within each urcu
flavor for which a resizeable hash table is created until the end of the
executable lifetime.

The other part of the fix is to move the hash table destruction to the
worker thread for auto-resize hash tables. This prevents having to wait
for resize callbacks from RCU read-side critical section. This is
guaranteed by the fact that the worker thread serializes previously
queued resize callbacks before the destroy callback.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: If8b1c3c8063dc7b9846dc5c3fc452efd917eab4d

include/urcu/rculfhash.h
src/rculfhash-internal.h
src/rculfhash.c
src/urcu-call-rcu-impl.h

index 8586e1da371f86e2c19518f60a8cef69e6db691e..fbd33cc3ff346c704ae264d724563977a5ff473f 100644 (file)
@@ -247,16 +247,16 @@ struct cds_lfht *cds_lfht_new(unsigned long init_size,
  *
  * Return 0 on success, negative error value on error.
 
+ * Threads calling this API need to be registered RCU read-side threads.
+ *
  * Prior to liburcu 0.10:
- * - Threads calling this API need to be registered RCU read-side
- *   threads.
  * - cds_lfht_destroy should *not* be called from a RCU read-side
  *   critical section. It should *not* be called from a call_rcu thread
  *   context neither.
  *
  * Starting from liburcu 0.10, rculfhash implements its own worker
- * thread to handle resize operations, which removes RCU requirements on
- * cds_lfht_destroy.
+ * thread to handle resize operations, which removes the above RCU
+ * read-side critical section requirement on cds_lfht_destroy.
  */
 extern
 int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr);
index e17210b8624cf7fd370ef88f591174e100085548..325e55bed31a920ae7f27ef08343707ee6379163 100644 (file)
@@ -29,6 +29,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+#include "workqueue.h"
+
 #ifdef DEBUG
 #define dbg_printf(fmt, args...)     printf("[debug rculfhash] " fmt, ## args)
 #else
@@ -82,11 +84,13 @@ struct cds_lfht {
         * therefore cause grace-period deadlock if we hold off RCU G.P.
         * completion.
         */
-       pthread_mutex_t resize_mutex;   /* resize mutex: add/del mutex */
-       pthread_attr_t *resize_attr;    /* Resize threads attributes */
+       pthread_mutex_t resize_mutex;           /* resize mutex: add/del mutex */
+       pthread_attr_t *caller_resize_attr;     /* resize threads attributes from lfht_new caller */
+       pthread_attr_t resize_attr;
        unsigned int in_progress_destroy;
        unsigned long resize_target;
        int resize_initiated;
+       struct urcu_work destroy_work;
 
        /*
         * Variables needed for add and remove fast-paths.
index 5391d28e923a0a9b44909f3b8711692f36f4edbf..37fbc1b7c44c1ef5bcd347b19b7ef3a483d6c016 100644 (file)
@@ -363,7 +363,6 @@ struct partition_resize_work {
 };
 
 static struct urcu_workqueue *cds_lfht_workqueue;
-static unsigned long cds_lfht_workqueue_user_count;
 
 /*
  * Mutex ensuring mutual exclusion between workqueue initialization and
@@ -380,8 +379,8 @@ static struct urcu_atfork cds_lfht_atfork;
  */
 static int cds_lfht_workqueue_atfork_nesting;
 
+static void __attribute__((destructor)) cds_lfht_exit(void);
 static void cds_lfht_init_worker(const struct rcu_flavor_struct *flavor);
-static void cds_lfht_fini_worker(const struct rcu_flavor_struct *flavor);
 
 #ifdef CONFIG_CDS_LFHT_ITER_DEBUG
 
@@ -1286,7 +1285,8 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i,
                work[thread].len = partition_len;
                work[thread].start = thread * partition_len;
                work[thread].fct = fct;
-               ret = pthread_create(&(work[thread].thread_id), ht->resize_attr,
+               ret = pthread_create(&(work[thread].thread_id),
+                       ht->caller_resize_attr ? &ht->resize_attr : NULL,
                        partition_resize_thread, &work[thread]);
                if (ret == EAGAIN) {
                        /*
@@ -1643,7 +1643,9 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,
 
        ht->flags = flags;
        ht->flavor = flavor;
-       ht->resize_attr = attr;
+       ht->caller_resize_attr = attr;
+       if (attr)
+               ht->resize_attr = *attr;
        alloc_split_items_count(ht);
        /* this mutex should not nest in read-side C.S. */
        pthread_mutex_init(&ht->resize_mutex, NULL);
@@ -1857,6 +1859,35 @@ int cds_lfht_is_node_deleted(const struct cds_lfht_node *node)
        return is_removed(CMM_LOAD_SHARED(node->next));
 }
 
+static
+bool cds_lfht_is_empty(struct cds_lfht *ht)
+{
+       struct cds_lfht_node *node, *next;
+       bool empty = true;
+       bool was_online;
+
+       was_online = ht->flavor->read_ongoing();
+       if (!was_online) {
+               ht->flavor->thread_online();
+               ht->flavor->read_lock();
+       }
+       /* Check that the table is empty */
+       node = bucket_at(ht, 0);
+       do {
+               next = rcu_dereference(node->next);
+               if (!is_bucket(next)) {
+                       empty = false;
+                       break;
+               }
+               node = clear_flag(next);
+       } while (!is_end(node));
+       if (!was_online) {
+               ht->flavor->read_unlock();
+               ht->flavor->thread_offline();
+       }
+       return empty;
+}
+
 static
 int cds_lfht_delete_bucket(struct cds_lfht *ht)
 {
@@ -1891,6 +1922,24 @@ int cds_lfht_delete_bucket(struct cds_lfht *ht)
        return 0;
 }
 
+static
+void do_auto_resize_destroy_cb(struct urcu_work *work)
+{
+       struct cds_lfht *ht = caa_container_of(work, struct cds_lfht, destroy_work);
+       int ret;
+
+       ht->flavor->register_thread();
+       ret = cds_lfht_delete_bucket(ht);
+       if (ret)
+               urcu_die(ret);
+       free_split_items_count(ht);
+       ret = pthread_mutex_destroy(&ht->resize_mutex);
+       if (ret)
+               urcu_die(ret);
+       ht->flavor->unregister_thread();
+       poison_free(ht);
+}
+
 /*
  * Should only be called when no more concurrent readers nor writers can
  * possibly access the table.
@@ -1900,22 +1949,38 @@ int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr)
        int ret;
 
        if (ht->flags & CDS_LFHT_AUTO_RESIZE) {
+               /*
+                * Perform error-checking for emptiness before queuing
+                * work, so we can return error to the caller. This runs
+                * concurrently with ongoing resize.
+                */
+               if (!cds_lfht_is_empty(ht))
+                       return -EPERM;
                /* Cancel ongoing resize operations. */
                _CMM_STORE_SHARED(ht->in_progress_destroy, 1);
-               /* Wait for in-flight resize operations to complete */
-               urcu_workqueue_flush_queued_work(cds_lfht_workqueue);
+               if (attr) {
+                       *attr = ht->caller_resize_attr;
+                       ht->caller_resize_attr = NULL;
+               }
+               /*
+                * Queue destroy work after prior queued resize
+                * operations. Given there are no concurrent writers
+                * accessing the hash table at this point, no resize
+                * operations can be queued after this destroy work.
+                */
+               urcu_workqueue_queue_work(cds_lfht_workqueue,
+                       &ht->destroy_work, do_auto_resize_destroy_cb);
+               return 0;
        }
        ret = cds_lfht_delete_bucket(ht);
        if (ret)
                return ret;
        free_split_items_count(ht);
        if (attr)
-               *attr = ht->resize_attr;
+               *attr = ht->caller_resize_attr;
        ret = pthread_mutex_destroy(&ht->resize_mutex);
        if (ret)
                ret = -EBUSY;
-       if (ht->flags & CDS_LFHT_AUTO_RESIZE)
-               cds_lfht_fini_worker(ht->flavor);
        poison_free(ht);
        return ret;
 }
@@ -2179,23 +2244,19 @@ static void cds_lfht_init_worker(const struct rcu_flavor_struct *flavor)
        flavor->register_rculfhash_atfork(&cds_lfht_atfork);
 
        mutex_lock(&cds_lfht_fork_mutex);
-       if (cds_lfht_workqueue_user_count++)
-               goto end;
-       cds_lfht_workqueue = urcu_workqueue_create(0, -1, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL);
-end:
+       if (!cds_lfht_workqueue)
+               cds_lfht_workqueue = urcu_workqueue_create(0, -1, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL, NULL);
        mutex_unlock(&cds_lfht_fork_mutex);
 }
 
-static void cds_lfht_fini_worker(const struct rcu_flavor_struct *flavor)
+static void cds_lfht_exit(void)
 {
        mutex_lock(&cds_lfht_fork_mutex);
-       if (--cds_lfht_workqueue_user_count)
-               goto end;
-       urcu_workqueue_destroy(cds_lfht_workqueue);
-       cds_lfht_workqueue = NULL;
-end:
+       if (cds_lfht_workqueue) {
+               urcu_workqueue_flush_queued_work(cds_lfht_workqueue);
+               urcu_workqueue_destroy(cds_lfht_workqueue);
+               cds_lfht_workqueue = NULL;
+       }
        mutex_unlock(&cds_lfht_fork_mutex);
-
-       flavor->unregister_rculfhash_atfork(&cds_lfht_atfork);
 }
index 9f85d55b403c5fa1f882947af021c6cae460d712..1aba04a9a15e320fd9ae9a5ab2387fca643439db 100644 (file)
@@ -102,7 +102,6 @@ static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER;
 static struct call_rcu_data *default_call_rcu_data;
 
 static struct urcu_atfork *registered_rculfhash_atfork;
-static unsigned long registered_rculfhash_atfork_refcount;
 
 /*
  * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are
@@ -1028,20 +1027,20 @@ void call_rcu_after_fork_child(void)
 
 void urcu_register_rculfhash_atfork(struct urcu_atfork *atfork)
 {
+       if (CMM_LOAD_SHARED(registered_rculfhash_atfork))
+               return;
        call_rcu_lock(&call_rcu_mutex);
-       if (registered_rculfhash_atfork_refcount++)
-               goto end;
-       registered_rculfhash_atfork = atfork;
-end:
+       if (!registered_rculfhash_atfork)
+               registered_rculfhash_atfork = atfork;
        call_rcu_unlock(&call_rcu_mutex);
 }
 
+/*
+ * This unregistration function is deprecated, meant only for internal
+ * use by rculfhash.
+ */
+__attribute__((noreturn))
 void urcu_unregister_rculfhash_atfork(struct urcu_atfork *atfork __attribute__((unused)))
 {
-       call_rcu_lock(&call_rcu_mutex);
-       if (--registered_rculfhash_atfork_refcount)
-               goto end;
-       registered_rculfhash_atfork = NULL;
-end:
-       call_rcu_unlock(&call_rcu_mutex);
+       urcu_die(EPERM);
 }
This page took 0.030648 seconds and 4 git commands to generate.