From 4afee0a75aa60a2640afc9ebccc417c3179b276e Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 18 Aug 2010 10:18:58 -0400 Subject: [PATCH] Wait-free queue: wait-free enqueue, blocking dequeue without RCU dependency Implementation better suited for supporting call_rcu(). Signed-off-by: Mathieu Desnoyers --- tests/Makefile.am | 2 +- tests/test_urcu_wfq.c | 50 ++++---------- urcu/rcuwfqueue.h | 139 ------------------------------------- urcu/wfqueue.h | 156 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 169 insertions(+), 178 deletions(-) delete mode 100644 urcu/rcuwfqueue.h create mode 100644 urcu/wfqueue.h diff --git a/tests/Makefile.am b/tests/Makefile.am index a009ced..b7b9ebe 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -153,7 +153,7 @@ test_urcu_bp_dynamic_link_SOURCES = test_urcu_bp.c $(URCU_BP) test_urcu_bp_dynamic_link_CFLAGS = -DDYNAMIC_LINK_TEST $(AM_CFLAGS) test_urcu_lfq_SOURCES = test_urcu_lfq.c $(URCU_DEFER) -test_urcu_wfq_SOURCES = test_urcu_wfq.c $(URCU_DEFER) +test_urcu_wfq_SOURCES = test_urcu_wfq.c test_urcu_lfs_SOURCES = test_urcu_lfs.c $(URCU_DEFER) test_urcu_wfs_SOURCES = test_urcu_wfs.c $(URCU_DEFER) diff --git a/tests/test_urcu_wfq.c b/tests/test_urcu_wfq.c index 2399c7a..b0629d2 100644 --- a/tests/test_urcu_wfq.c +++ b/tests/test_urcu_wfq.c @@ -62,8 +62,7 @@ static inline pid_t gettid(void) #define _LGPL_SOURCE #endif #include -#include -#include +#include static volatile int test_go, test_stop; @@ -154,7 +153,7 @@ static unsigned long long __thread nr_successful_enqueues; static unsigned int nr_enqueuers; static unsigned int nr_dequeuers; -static struct rcu_wfq_queue q; +static struct wfq_queue q; void *thr_enqueuer(void *_count) { @@ -165,19 +164,17 @@ void *thr_enqueuer(void *_count) set_affinity(); - rcu_register_thread(); - while (!test_go) { } smp_mb(); for (;;) { - struct rcu_wfq_node *node = malloc(sizeof(*node)); + struct wfq_node *node = malloc(sizeof(*node)); if (!node) goto fail; - rcu_wfq_node_init(node); - rcu_wfq_enqueue(&q, node); + wfq_node_init(node); + wfq_enqueue(&q, node); nr_successful_enqueues++; if (unlikely(wdelay)) @@ -188,8 +185,6 @@ fail: break; } - rcu_unregister_thread(); - count[0] = nr_enqueues; count[1] = nr_successful_enqueues; printf_verbose("enqueuer thread_end, thread id : %lx, tid %lu, " @@ -200,14 +195,6 @@ fail: } -static void rcu_release_node(struct urcu_ref *ref) -{ - struct rcu_wfq_node *node = container_of(ref, struct rcu_wfq_node, ref); - defer_rcu(free, node); - //synchronize_rcu(); - //free(node); -} - void *thr_dequeuer(void *_count) { unsigned long long *count = _count; @@ -217,20 +204,16 @@ void *thr_dequeuer(void *_count) set_affinity(); - rcu_defer_register_thread(); - rcu_register_thread(); - while (!test_go) { } smp_mb(); for (;;) { - struct rcu_wfq_node *node = - rcu_wfq_dequeue_blocking(&q, rcu_release_node); + struct wfq_node *node = wfq_dequeue_blocking(&q); if (node) { - urcu_ref_put(&node->ref, rcu_release_node); + free(node); nr_successful_dequeues++; } @@ -241,9 +224,6 @@ void *thr_dequeuer(void *_count) loop_sleep(rduration); } - rcu_unregister_thread(); - rcu_defer_unregister_thread(); - printf_verbose("dequeuer thread_end, thread id : %lx, tid %lu, " "dequeues %llu, successful_dequeues %llu\n", pthread_self(), (unsigned long)gettid(), nr_dequeues, @@ -253,20 +233,14 @@ void *thr_dequeuer(void *_count) return ((void*)2); } -static void release_node(struct urcu_ref *ref) -{ - struct rcu_wfq_node *node = container_of(ref, struct rcu_wfq_node, ref); - free(node); -} - -void test_end(struct rcu_wfq_queue *q, unsigned long long *nr_dequeues) +void test_end(struct wfq_queue *q, unsigned long long *nr_dequeues) { - struct rcu_wfq_node *node; + struct wfq_node *node; do { - node = rcu_wfq_dequeue_blocking(q, release_node); + node = wfq_dequeue_blocking(q); if (node) { - urcu_ref_put(&node->ref, release_node); + free(node); (*nr_dequeues)++; } } while (node); @@ -363,7 +337,7 @@ int main(int argc, char **argv) tid_dequeuer = malloc(sizeof(*tid_dequeuer) * nr_dequeuers); count_enqueuer = malloc(2 * sizeof(*count_enqueuer) * nr_enqueuers); count_dequeuer = malloc(2 * sizeof(*count_dequeuer) * nr_dequeuers); - rcu_wfq_init(&q); + wfq_init(&q); next_aff = 0; diff --git a/urcu/rcuwfqueue.h b/urcu/rcuwfqueue.h deleted file mode 100644 index 8c1d4a1..0000000 --- a/urcu/rcuwfqueue.h +++ /dev/null @@ -1,139 +0,0 @@ -#ifndef _URCU_RCUWFQUEUE_H -#define _URCU_RCUWFQUEUE_H - -/* - * rcuwfqueue.h - * - * Userspace RCU library - RCU Queue with Wait-Free Enqueue/Blocking Dequeue - * - * Copyright 2010 - Mathieu Desnoyers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#if (!defined(_GNU_SOURCE) && !defined(_LGPL_SOURCE)) -#error "Dynamic loader LGPL wrappers not implemented yet" -#endif - -/* - * RCU queue with wait-free enqueue/blocking dequeue using reference counting. - * Enqueue and dequeue operations hold a RCU read lock to deal with cmpxchg ABA - * problem. This implementation keeps a dummy head node to ensure we can always - * update the queue locklessly. Given that this is a queue, the dummy head node - * must always advance as we dequeue entries. Therefore, we keep a reference - * count on each entry we are dequeueing, so they can be kept as dummy head node - * until the next dequeue, at which point their reference count will be - * decremented. - */ - -#define URCU_WFQ_PERMANENT_REF 128 - -struct rcu_wfq_node { - struct rcu_wfq_node *next; - struct urcu_ref ref; -}; - -struct rcu_wfq_queue { - struct rcu_wfq_node *head, *tail; - struct rcu_wfq_node init; /* Dummy initialization node */ -}; - -void rcu_wfq_node_init(struct rcu_wfq_node *node) -{ - node->next = NULL; - urcu_ref_init(&node->ref); -} - -void rcu_wfq_init(struct rcu_wfq_queue *q) -{ - rcu_wfq_node_init(&q->init); - /* Make sure the initial node is never freed. */ - urcu_ref_set(&q->init.ref, URCU_WFQ_PERMANENT_REF); - /* Set queue end */ - q->head = q->tail = &q->init; -} - -void rcu_wfq_enqueue(struct rcu_wfq_queue *q, struct rcu_wfq_node *node) -{ - struct rcu_wfq_node *old_tail; - - urcu_ref_get(&node->ref); - /* - * uatomic_xchg() implicit memory barrier orders earlier stores to node - * (setting it to NULL and incrementing the refcount) before - * publication. - */ - old_tail = uatomic_xchg(&q->tail, node); - /* - * At this point, dequeuers see a NULL old_tail->next, which indicates - * end of queue. The following store will append "node" to the queue - * from a dequeuer perspective. - */ - STORE_SHARED(old_tail->next, node); -} - -/* - * The entry returned by dequeue must be taken care of by doing a urcu_ref_put, - * which calls the release primitive when the reference count drops to zero. A - * grace period must be waited before performing the actual memory reclamation - * in the release primitive. The wfq node returned by dequeue must not be - * modified/re-used/freed until the reference count reaches zero and a grace - * period has elapsed (after the refcount reached 0). - * - * No need to go on a waitqueue here, as there is no possible state in which the - * list could cause dequeue to busy-loop needlessly while waiting for another - * thread to be scheduled. The queue appears empty until tail->next is set by - * enqueue. - */ -struct rcu_wfq_node * -rcu_wfq_dequeue_blocking(struct rcu_wfq_queue *q, - void (*release)(struct urcu_ref *)) -{ - for (;;) { - struct rcu_wfq_node *head, *next; - - rcu_read_lock(); - head = rcu_dereference(q->head); - next = rcu_dereference(head->next); - if (next) { - if (uatomic_cmpxchg(&q->head, head, next) == head) { - rcu_read_unlock(); - urcu_ref_put(&head->ref, release); - return next; - } else { - /* Concurrently pushed, retry */ - rcu_read_unlock(); - continue; - } - } else { - /* Empty */ - rcu_read_unlock(); - return NULL; - } - } -} - -#ifdef __cplusplus -} -#endif - -#endif /* _URCU_RCUWFQUEUE_H */ diff --git a/urcu/wfqueue.h b/urcu/wfqueue.h new file mode 100644 index 0000000..b4208da --- /dev/null +++ b/urcu/wfqueue.h @@ -0,0 +1,156 @@ +#ifndef _URCU_WFQUEUE_H +#define _URCU_WFQUEUE_H + +/* + * wfqueue.h + * + * Userspace RCU library - Queue with Wait-Free Enqueue/Blocking Dequeue + * + * Copyright 2010 - Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if (!defined(_GNU_SOURCE) && !defined(_LGPL_SOURCE)) +#error "Dynamic loader LGPL wrappers not implemented yet" +#endif + +/* + * Queue with wait-free enqueue/blocking dequeue. + * This implementation adds a dummy head node when the queue is empty to ensure + * we can always update the queue locklessly. + * + * Inspired from half-wait-free/half-blocking queue implementation done by + * Paul E. McKenney. + */ + +struct wfq_node { + struct wfq_node *next; +}; + +struct wfq_queue { + struct wfq_node *head, **tail; + struct wfq_node dummy; /* Dummy node */ + pthread_mutex_t lock; +}; + +void wfq_node_init(struct wfq_node *node) +{ + node->next = NULL; +} + +void wfq_init(struct wfq_queue *q) +{ + int ret; + + wfq_node_init(&q->dummy); + /* Set queue head and tail */ + q->head = &q->dummy; + q->tail = &q->dummy.next; + ret = pthread_mutex_init(&q->lock, NULL); + assert(!ret); +} + +void wfq_enqueue(struct wfq_queue *q, struct wfq_node *node) +{ + struct wfq_node **old_tail; + + /* + * uatomic_xchg() implicit memory barrier orders earlier stores to data + * structure containing node and setting node->next to NULL before + * publication. + */ + old_tail = uatomic_xchg(&q->tail, node); + /* + * At this point, dequeuers see a NULL old_tail->next, which indicates + * that the queue is being appended to. The following store will append + * "node" to the queue from a dequeuer perspective. + */ + STORE_SHARED(*old_tail, node); +} + +/* + * It is valid to reuse and free a dequeued node immediately. + * + * No need to go on a waitqueue here, as there is no possible state in which the + * list could cause dequeue to busy-loop needlessly while waiting for another + * thread to be scheduled. The queue appears empty until tail->next is set by + * enqueue. + */ +struct wfq_node * +__wfq_dequeue_blocking(struct wfq_queue *q) +{ + struct wfq_node *node, *next; + int busy_wait = 16; + + /* + * Queue is empty if it only contains the dummy node. + */ + if (q->head == &q->dummy && LOAD_SHARED(q->tail) == &q->dummy.next) + return NULL; + node = q->head; + + /* + * Adaptative busy-looping waiting for enqueuer to complete enqueue. + */ + while ((next = LOAD_SHARED(node->next)) == NULL) { + if (busy_wait > 0) { + cpu_relax(); + busy_wait--; + } else + poll(NULL, 0, 1); /* Wait for 1ms */ + } + /* + * Move queue head forward. + */ + q->head = next; + /* + * Requeue dummy node if we just dequeued it. + */ + if (node == &q->dummy) { + wfq_node_init(node); + wfq_enqueue(q, node); + return __wfq_dequeue_blocking(q); + } + return node; +} + +struct wfq_node * +wfq_dequeue_blocking(struct wfq_queue *q) +{ + struct wfq_node *retnode; + int ret; + + ret = pthread_mutex_lock(&q->lock); + assert(!ret); + retnode = __wfq_dequeue_blocking(q); + ret = pthread_mutex_unlock(&q->lock); + assert(!ret); + return retnode; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _URCU_WFQUEUE_H */ -- 2.34.1