From 804b437535f97b519db3e8715807950f5818cf27 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 19 Sep 2009 22:20:16 -0400 Subject: [PATCH] generic urcu deferral (call_rcu()) Found out a way to encode the queues so the standard scenario is to use a single pointer per call_rcu(). Uses more space for: - unaligned functions pointers. - unaligned data pointers. - function/data value : -2L. (this is arbitrary) Signed-off-by: Mathieu Desnoyers --- tests/test_urcu_defer.c | 19 ++++++++++- urcu-defer-static.h | 72 ++++++++++++++++++++++++++++++++++++++--- urcu-defer.c | 29 ++++++++++++++--- urcu-defer.h | 5 ++- 4 files changed, 113 insertions(+), 12 deletions(-) diff --git a/tests/test_urcu_defer.c b/tests/test_urcu_defer.c index e56dffe..233b384 100644 --- a/tests/test_urcu_defer.c +++ b/tests/test_urcu_defer.c @@ -209,6 +209,14 @@ void *thr_reader(void *_count) } +static void test_cb2(void *data) +{ +} + +static void test_cb1(void *data) +{ +} + void *thr_writer(void *data) { unsigned long wtidx = (unsigned long)data; @@ -230,7 +238,16 @@ void *thr_writer(void *data) new = malloc(sizeof(*new)); new->a = 8; old = rcu_xchg_pointer(&test_rcu_pointer, new); - rcu_defer_queue(old); + call_rcu(free, old); +#if 0 + call_rcu(test_cb1, old); + call_rcu(test_cb1, (void *)-2L); + call_rcu(test_cb1, (void *)-2L); + call_rcu(test_cb1, old); + call_rcu(test_cb2, (void *)-2L); +#endif //0 + call_rcu(test_cb2, (void *)-4L); + //call_rcu(test_cb2, (void *)-2L); nr_writes++; if (unlikely(!test_duration_write())) break; diff --git a/urcu-defer-static.h b/urcu-defer-static.h index 2513769..7d0ed52 100644 --- a/urcu-defer-static.h +++ b/urcu-defer-static.h @@ -42,6 +42,23 @@ #define DEFER_QUEUE_SIZE (1 << 12) #define DEFER_QUEUE_MASK (DEFER_QUEUE_SIZE - 1) +/* + * Typically, data is aligned at least on the architecture size. + * Use lowest bit to indicate that the current callback is changing. + * Assumes that (void *)-2L is not used often. Used to encode non-aligned + * functions and non-aligned data using extra space. + * We encode the (void *)-2L fct as: -2L, fct, data. + * We encode the (void *)-2L data as: -2L, fct, data. + * Here, DQ_FCT_MARK == ~DQ_FCT_BIT. Required for the test order. + */ +#define DQ_FCT_BIT (1 << 0) +#define DQ_IS_FCT_BIT(x) ((unsigned long)(x) & DQ_FCT_BIT) +#define DQ_SET_FCT_BIT(x) \ + (x = (void *)((unsigned long)(x) | DQ_FCT_BIT)) +#define DQ_CLEAR_FCT_BIT(x) \ + (x = (void *)((unsigned long)(x) & ~DQ_FCT_BIT)) +#define DQ_FCT_MARK ((void *)(~DQ_FCT_BIT)) + /* * Identify a shared load. A smp_rmc() or smp_mc() should come before the load. */ @@ -86,9 +103,22 @@ #define rcu_assert(args...) #endif +/* + * defer queue. + * Contains pointers. Encoded to save space when same callback is often used. + * When looking up the next item: + * - if DQ_FCT_BIT is set, set the current callback to DQ_CLEAR_FCT_BIT(ptr) + * - next element contains pointer to data. + * - else if item == DQ_FCT_MARK + * - set the current callback to next element ptr + * - following next element contains pointer to data. + * - else current element contains data + */ struct defer_queue { unsigned long head; /* add element at head */ + void *last_fct_in; /* last fct pointer encoded */ unsigned long tail; /* next element to remove at tail */ + void *last_fct_out; /* last fct pointer encoded */ void **q; }; @@ -99,7 +129,7 @@ extern void rcu_defer_barrier_thread(void); /* * not signal-safe. */ -static inline void _rcu_defer_queue(void *p) +static inline void _rcu_defer_queue(void (*fct)(void *p), void *p) { unsigned long head, tail; @@ -112,17 +142,49 @@ static inline void _rcu_defer_queue(void *p) /* * If queue is full, empty it ourself. + * Worse-case: must allow 2 supplementary entries for fct pointer. */ - if (unlikely(head - tail >= DEFER_QUEUE_SIZE)) { - assert(head - tail == DEFER_QUEUE_SIZE); + if (unlikely(head - tail >= DEFER_QUEUE_SIZE - 2)) { + assert(head - tail <= DEFER_QUEUE_SIZE); rcu_defer_barrier_thread(); assert(head - LOAD_SHARED(defer_queue.tail) == 0); } smp_wmb(); /* Publish new pointer before write q[] */ - _STORE_SHARED(defer_queue.q[head & DEFER_QUEUE_MASK], p); + if (unlikely(defer_queue.last_fct_in != fct)) { + //printf("fct diff %p %p\n", defer_queue.last_fct, fct); + defer_queue.last_fct_in = fct; + if (unlikely(DQ_IS_FCT_BIT(fct) || fct == DQ_FCT_MARK)) { + /* + * If the function to encode is not aligned or the + * marker, write DQ_FCT_MARK followed by the function + * pointer. + */ + _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], + DQ_FCT_MARK); + _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], + fct); + } else { + DQ_SET_FCT_BIT(fct); + _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], + fct); + } + } else { + //printf("fct same %p\n", fct); + if (unlikely(DQ_IS_FCT_BIT(p) || p == DQ_FCT_MARK)) { + /* + * If the data to encode is not aligned or the marker, + * write DQ_FCT_MARK followed by the function pointer. + */ + _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], + DQ_FCT_MARK); + _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], + fct); + } + } + _STORE_SHARED(defer_queue.q[head++ & DEFER_QUEUE_MASK], p); smp_wmb(); /* Write q[] before head. */ - STORE_SHARED(defer_queue.head, head + 1); + STORE_SHARED(defer_queue.head, head); } #endif /* _URCU_DEFER_STATIC_H */ diff --git a/urcu-defer.c b/urcu-defer.c index 1e5d2b6..00dd3c6 100644 --- a/urcu-defer.c +++ b/urcu-defer.c @@ -101,18 +101,35 @@ static void internal_urcu_unlock(pthread_mutex_t *mutex) * Must be called after Q.S. is reached. */ static void rcu_defer_barrier_queue(struct defer_queue *queue, - unsigned long head) + unsigned long head) { unsigned long i; + void (*fct)(void *p); + void *p; /* * Tail is only modified when lock is held. * Head is only modified by owner thread. */ - for (i = queue->tail; i != head; i++) { + for (i = queue->tail; i != head;) { smp_rmb(); /* read head before q[]. */ - free(LOAD_SHARED(queue->q[i & DEFER_QUEUE_MASK])); + p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]); + if (unlikely(DQ_IS_FCT_BIT(p))) { + //printf("%lu fct bit %p\n", i-1, p); + DQ_CLEAR_FCT_BIT(p); + queue->last_fct_out = p; + p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]); + } else if (unlikely(p == DQ_FCT_MARK)) { + //printf("%lu fct mark %p\n", i-1, p); + p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]); + queue->last_fct_out = p; + p = LOAD_SHARED(queue->q[i++ & DEFER_QUEUE_MASK]); + }// else + //printf("%lu data %p\n", i-1, p); + fct = queue->last_fct_out; + //printf("tid %lu %lu last_fct %p data %p\n", pthread_self(), i-1, fct, p); + fct(p); } smp_mb(); /* push tail after having used q[] */ STORE_SHARED(queue->tail, i); @@ -168,9 +185,9 @@ void *thr_defer(void *args) * library wrappers to be used by non-LGPL compatible source code. */ -void rcu_defer_queue(void *p) +void rcu_defer_queue(void (*fct)(void *p), void *p) { - _rcu_defer_queue(p); + _rcu_defer_queue(fct, p); } static void rcu_add_deferer(pthread_t id) @@ -195,6 +212,7 @@ static void rcu_add_deferer(pthread_t id) registry[num_deferers].tid = id; /* reference to the TLS of _this_ deferer thread. */ registry[num_deferers].defer_queue = &defer_queue; + registry[num_deferers].last_head = 0; num_deferers++; } @@ -213,6 +231,7 @@ static void rcu_remove_deferer(pthread_t id) sizeof(struct deferer_registry)); registry[num_deferers - 1].tid = 0; registry[num_deferers - 1].defer_queue = NULL; + registry[num_deferers - 1].last_head = 0; num_deferers--; return; } diff --git a/urcu-defer.h b/urcu-defer.h index 456b8b7..3e04c00 100644 --- a/urcu-defer.h +++ b/urcu-defer.h @@ -57,10 +57,13 @@ * library wrappers to be used by non-LGPL compatible source code. */ -extern void rcu_defer_queue(void *p); +extern void rcu_defer_queue(void (*fct)(void *p), void *p); #endif /* !_LGPL_SOURCE */ +#define call_rcu rcu_defer_queue +#define rcu_reclaim_queue(p) rcu_defer_queue(free, p) + /* * Thread registration for reclamation. */ -- 2.34.1