From: Mathieu Desnoyers Date: Tue, 13 Sep 2011 21:34:19 +0000 (-0400) Subject: Merge branch 'master' into urcu/ht-shrink-help X-Git-Tag: v0.7.0~43^2~153 X-Git-Url: http://git.liburcu.org/?a=commitdiff_plain;h=bce63dfd0a2306452c9e39f5df01789e77f3f44a;hp=4105056a2fa97794eb32bbf512d2795406071c9c;p=userspace-rcu.git Merge branch 'master' into urcu/ht-shrink-help Conflicts: urcu-qsbr.c Signed-off-by: Mathieu Desnoyers --- diff --git a/tests/Makefile.am b/tests/Makefile.am index 9e5940f..7c638ed 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -165,8 +165,8 @@ test_urcu_wfq_dynlink_SOURCES = test_urcu_wfq.c test_urcu_wfq_dynlink_CFLAGS = -DDYNAMIC_LINK_TEST $(AM_CFLAGS) test_urcu_wfq_dynlink_LDADD = $(URCU_COMMON_LIB) -test_urcu_lfs_SOURCES = test_urcu_lfs.c $(URCU_CDS_LIB) $(URCU_DEFER) -test_urcu_lfs_dynlink_SOURCES = test_urcu_lfs.c $(URCU_DEFER) +test_urcu_lfs_SOURCES = test_urcu_lfs.c $(URCU) $(URCU_CDS_LIB) +test_urcu_lfs_dynlink_SOURCES = test_urcu_lfs.c $(URCU) test_urcu_lfs_dynlink_CFLAGS = -DDYNAMIC_LINK_TEST $(AM_CFLAGS) test_urcu_lfs_dynlink_LDADD = $(URCU_CDS_LIB) diff --git a/tests/test_urcu_lfq.c b/tests/test_urcu_lfq.c index b61a7d4..5292ebd 100644 --- a/tests/test_urcu_lfq.c +++ b/tests/test_urcu_lfq.c @@ -157,6 +157,11 @@ static unsigned long long __thread nr_successful_enqueues; static unsigned int nr_enqueuers; static unsigned int nr_dequeuers; +struct test { + struct cds_lfq_node_rcu list; + struct rcu_head rcu; +}; + static struct cds_lfq_queue_rcu q; void *thr_enqueuer(void *_count) @@ -176,12 +181,12 @@ void *thr_enqueuer(void *_count) cmm_smp_mb(); for (;;) { - struct cds_lfq_node_rcu *node = malloc(sizeof(*node)); + struct test *node = malloc(sizeof(*node)); if (!node) goto fail; - cds_lfq_node_init_rcu(node); + cds_lfq_node_init_rcu(&node->list); rcu_read_lock(); - cds_lfq_enqueue_rcu(&q, node); + cds_lfq_enqueue_rcu(&q, &node->list); rcu_read_unlock(); nr_successful_enqueues++; @@ -205,6 +210,14 @@ fail: } +static +void free_node_cb(struct rcu_head *head) +{ + struct test *node = + caa_container_of(head, struct test, rcu); + free(node); +} + void *thr_dequeuer(void *_count) { unsigned long long *count = _count; @@ -228,14 +241,16 @@ void *thr_dequeuer(void *_count) cmm_smp_mb(); for (;;) { - struct cds_lfq_node_rcu *node; + struct cds_lfq_node_rcu *qnode; + struct test *node; rcu_read_lock(); - node = cds_lfq_dequeue_rcu(&q); + qnode = cds_lfq_dequeue_rcu(&q); + node = caa_container_of(qnode, struct test, list); rcu_read_unlock(); if (node) { - defer_rcu(free, node); + call_rcu(&node->rcu, free_node_cb); nr_successful_dequeues++; } @@ -259,17 +274,18 @@ void *thr_dequeuer(void *_count) void test_end(struct cds_lfq_queue_rcu *q, unsigned long long *nr_dequeues) { - struct cds_lfq_node_rcu *node; + struct cds_lfq_node_rcu *snode; do { - rcu_read_lock(); - node = cds_lfq_dequeue_rcu(q); - rcu_read_unlock(); - if (node) { + snode = cds_lfq_dequeue_rcu(q); + if (snode) { + struct test *node; + + node = caa_container_of(snode, struct test, list); free(node); /* no more concurrent access */ (*nr_dequeues)++; } - } while (node); + } while (snode); } void show_usage(int argc, char **argv) @@ -364,6 +380,8 @@ int main(int argc, char **argv) count_enqueuer = malloc(2 * sizeof(*count_enqueuer) * nr_enqueuers); count_dequeuer = malloc(2 * sizeof(*count_dequeuer) * nr_dequeuers); cds_lfq_init_rcu(&q, call_rcu); + err = create_all_cpu_call_rcu_data(0); + assert(!err); next_aff = 0; @@ -432,6 +450,7 @@ int main(int argc, char **argv) tot_successful_enqueues, tot_successful_dequeues + end_dequeues); + free_all_cpu_call_rcu_data(); free(count_enqueuer); free(count_dequeuer); free(tid_enqueuer); diff --git a/tests/test_urcu_lfs.c b/tests/test_urcu_lfs.c index 252454d..c85fa44 100644 --- a/tests/test_urcu_lfs.c +++ b/tests/test_urcu_lfs.c @@ -157,6 +157,11 @@ static unsigned long long __thread nr_successful_enqueues; static unsigned int nr_enqueuers; static unsigned int nr_dequeuers; +struct test { + struct cds_lfs_node_rcu list; + struct rcu_head rcu; +}; + static struct cds_lfs_stack_rcu s; void *thr_enqueuer(void *_count) @@ -176,12 +181,12 @@ void *thr_enqueuer(void *_count) cmm_smp_mb(); for (;;) { - struct cds_lfs_node_rcu *node = malloc(sizeof(*node)); + struct test *node = malloc(sizeof(*node)); if (!node) goto fail; - cds_lfs_node_init_rcu(node); + cds_lfs_node_init_rcu(&node->list); /* No rcu read-side is needed for push */ - cds_lfs_push_rcu(&s, node); + cds_lfs_push_rcu(&s, &node->list); nr_successful_enqueues++; if (unlikely(wdelay)) @@ -204,6 +209,14 @@ fail: } +static +void free_node_cb(struct rcu_head *head) +{ + struct test *node = + caa_container_of(head, struct test, rcu); + free(node); +} + void *thr_dequeuer(void *_count) { unsigned long long *count = _count; @@ -227,13 +240,15 @@ void *thr_dequeuer(void *_count) cmm_smp_mb(); for (;;) { - struct cds_lfs_node_rcu *node; + struct cds_lfs_node_rcu *snode; + struct test *node; rcu_read_lock(); - node = cds_lfs_pop_rcu(&s); + snode = cds_lfs_pop_rcu(&s); + node = caa_container_of(snode, struct test, list); rcu_read_unlock(); if (node) { - defer_rcu(free, node); + call_rcu(&node->rcu, free_node_cb); nr_successful_dequeues++; } nr_dequeues++; @@ -257,15 +272,18 @@ void *thr_dequeuer(void *_count) void test_end(struct cds_lfs_stack_rcu *s, unsigned long long *nr_dequeues) { - struct cds_lfs_node_rcu *node; + struct cds_lfs_node_rcu *snode; do { - node = cds_lfs_pop_rcu(s); - if (node) { + snode = cds_lfs_pop_rcu(s); + if (snode) { + struct test *node; + + node = caa_container_of(snode, struct test, list); free(node); (*nr_dequeues)++; } - } while (node); + } while (snode); } void show_usage(int argc, char **argv) @@ -360,6 +378,8 @@ int main(int argc, char **argv) count_enqueuer = malloc(2 * sizeof(*count_enqueuer) * nr_enqueuers); count_dequeuer = malloc(2 * sizeof(*count_dequeuer) * nr_dequeuers); cds_lfs_init_rcu(&s); + err = create_all_cpu_call_rcu_data(0); + assert(!err); next_aff = 0; @@ -426,6 +446,7 @@ int main(int argc, char **argv) tot_successful_enqueues, tot_successful_dequeues + end_dequeues); + free_all_cpu_call_rcu_data(); free(count_enqueuer); free(count_dequeuer); free(tid_enqueuer); diff --git a/urcu-call-rcu-impl.h b/urcu-call-rcu-impl.h index 38cc001..c14cc18 100644 --- a/urcu-call-rcu-impl.h +++ b/urcu-call-rcu-impl.h @@ -482,7 +482,9 @@ void set_thread_call_rcu_data(struct call_rcu_data *crdp) /* * Create a separate call_rcu thread for each CPU. This does not * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data() - * function if you want that behavior. + * function if you want that behavior. Should be paired with + * free_all_cpu_call_rcu_data() to teardown these call_rcu worker + * threads. */ int create_all_cpu_call_rcu_data(unsigned long flags) diff --git a/urcu-qsbr.c b/urcu-qsbr.c index 1dc9979..1adaa94 100644 --- a/urcu-qsbr.c +++ b/urcu-qsbr.c @@ -208,21 +208,17 @@ void synchronize_rcu(void) was_online = rcu_reader.ctr; /* All threads should read qparity before accessing data structure - * where new ptr points to. - */ - /* Write new ptr before changing the qparity */ - cmm_smp_mb(); - - /* + * where new ptr points to. In the "then" case, rcu_thread_offline + * includes a memory barrier. + * * Mark the writer thread offline to make sure we don't wait for * our own quiescent state. This allows using synchronize_rcu() * in threads registered as readers. */ - if (was_online) { - CMM_STORE_SHARED(rcu_reader.ctr, 0); - cmm_smp_mb(); /* write rcu_reader.ctr before read futex */ - wake_up_gp(); - } + if (was_online) + rcu_thread_offline(); + else + cmm_smp_mb(); mutex_lock(&rcu_gp_lock); @@ -263,9 +259,9 @@ out: * freed. */ if (was_online) - _CMM_STORE_SHARED(rcu_reader.ctr, - CMM_LOAD_SHARED(rcu_gp_ctr)); - cmm_smp_mb(); + rcu_thread_online(); + else + cmm_smp_mb(); } #else /* !(CAA_BITS_PER_LONG < 64) */ void synchronize_rcu(void) @@ -279,12 +275,10 @@ void synchronize_rcu(void) * our own quiescent state. This allows using synchronize_rcu() * in threads registered as readers. */ - cmm_smp_mb(); - if (was_online) { - CMM_STORE_SHARED(rcu_reader.ctr, 0); - cmm_smp_mb(); /* write rcu_reader.ctr before read futex */ - wake_up_gp(); - } + if (was_online) + rcu_thread_offline(); + else + cmm_smp_mb(); mutex_lock(&rcu_gp_lock); if (cds_list_empty(®istry)) @@ -294,9 +288,9 @@ out: mutex_unlock(&rcu_gp_lock); if (was_online) - _CMM_STORE_SHARED(rcu_reader.ctr, - CMM_LOAD_SHARED(rcu_gp_ctr)); - cmm_smp_mb(); + rcu_thread_online(); + else + cmm_smp_mb(); } #endif /* !(CAA_BITS_PER_LONG < 64) */ diff --git a/urcu/arch/generic.h b/urcu/arch/generic.h index 100d3c6..1ea7f59 100644 --- a/urcu/arch/generic.h +++ b/urcu/arch/generic.h @@ -100,22 +100,50 @@ extern "C" { #endif #ifdef CONFIG_RCU_SMP +#ifndef cmm_smp_mb #define cmm_smp_mb() cmm_mb() +#endif +#ifndef cmm_smp_rmb #define cmm_smp_rmb() cmm_rmb() +#endif +#ifndef cmm_smp_wmb #define cmm_smp_wmb() cmm_wmb() +#endif +#ifndef cmm_smp_mc #define cmm_smp_mc() cmm_mc() +#endif +#ifndef cmm_smp_rmc #define cmm_smp_rmc() cmm_rmc() +#endif +#ifndef cmm_smp_wmc #define cmm_smp_wmc() cmm_wmc() +#endif +#ifndef cmm_smp_read_barrier_depends #define cmm_smp_read_barrier_depends() cmm_read_barrier_depends() +#endif #else +#ifndef cmm_smp_mb #define cmm_smp_mb() cmm_barrier() +#endif +#ifndef cmm_smp_rmb #define cmm_smp_rmb() cmm_barrier() +#endif +#ifndef cmm_smp_wmb #define cmm_smp_wmb() cmm_barrier() +#endif +#ifndef cmm_smp_mc #define cmm_smp_mc() cmm_barrier() +#endif +#ifndef cmm_smp_rmc #define cmm_smp_rmc() cmm_barrier() +#endif +#ifndef cmm_smp_wmc #define cmm_smp_wmc() cmm_barrier() +#endif +#ifndef cmm_smp_read_barrier_depends #define cmm_smp_read_barrier_depends() #endif +#endif #ifndef caa_cpu_relax #define caa_cpu_relax() cmm_barrier() diff --git a/urcu/arch/ppc.h b/urcu/arch/ppc.h index d7317bb..a03d688 100644 --- a/urcu/arch/ppc.h +++ b/urcu/arch/ppc.h @@ -48,11 +48,24 @@ extern "C" { rval; \ }) +#define mftb() \ + ({ \ + unsigned long long rval; \ + asm volatile("mftb %0" : "=r" (rval)); \ + rval; \ + }) + typedef unsigned long long cycles_t; -static inline cycles_t caa_get_cycles (void) +#ifdef __powerpc64__ +static inline cycles_t caa_get_cycles(void) { - long h, l; + return (cycles_t) mftb(); +} +#else +static inline cycles_t caa_get_cycles(void) +{ + unsigned long h, l; for (;;) { h = mftbu(); @@ -63,6 +76,7 @@ static inline cycles_t caa_get_cycles (void) return (((cycles_t) h) << 32) + l; } } +#endif #ifdef __cplusplus } diff --git a/urcu/arch/x86.h b/urcu/arch/x86.h index 9e5411f..c1e2e07 100644 --- a/urcu/arch/x86.h +++ b/urcu/arch/x86.h @@ -33,12 +33,27 @@ extern "C" { #ifdef CONFIG_RCU_HAVE_FENCE #define cmm_mb() asm volatile("mfence":::"memory") -#define cmm_rmb() asm volatile("lfence":::"memory") -#define cmm_wmb() asm volatile("sfence"::: "memory") + +/* + * Define cmm_rmb/cmm_wmb to "strict" barriers that may be needed when + * using SSE or working with I/O areas. cmm_smp_rmb/cmm_smp_wmb are + * only compiler barriers, which is enough for general use. + */ +#define cmm_rmb() asm volatile("lfence":::"memory") +#define cmm_wmb() asm volatile("sfence"::: "memory") +#define cmm_smp_rmb() cmm_barrier() +#define cmm_smp_wmb() cmm_barrier() #else /* - * Some non-Intel clones support out of order store. cmm_wmb() ceases to be a - * nop for these. + * We leave smp_rmb/smp_wmb as full barriers for processors that do not have + * fence instructions. + * + * An empty cmm_smp_rmb() may not be enough on old PentiumPro multiprocessor + * systems, due to an erratum. The Linux kernel says that "Even distro + * kernels should think twice before enabling this", but for now let's + * be conservative and leave the full barrier on 32-bit processors. Also, + * IDT WinChip supports weak store ordering, and the kernel may enable it + * under our feet; cmm_smp_wmb() ceases to be a nop for these processors. */ #define cmm_mb() asm volatile("lock; addl $0,0(%%esp)":::"memory") #define cmm_rmb() asm volatile("lock; addl $0,0(%%esp)":::"memory") diff --git a/urcu/list.h b/urcu/list.h index 7d2a9a1..04dad80 100644 --- a/urcu/list.h +++ b/urcu/list.h @@ -126,12 +126,15 @@ cds_list_splice (struct cds_list_head *add, struct cds_list_head *head) } } - /* Get typed element from list at a given position. */ #define cds_list_entry(ptr, type, member) \ ((type *) ((char *) (ptr) - (unsigned long) (&((type *) 0)->member))) +/* Get first entry from a list. */ +#define cds_list_first_entry(ptr, type, member) \ + cds_list_entry((ptr)->next, type, member) + /* Iterate forward over the elements of the list. */ #define cds_list_for_each(pos, head) \