Merge branch 'master' into urcu/ht-shrink-help
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Tue, 13 Sep 2011 21:34:19 +0000 (17:34 -0400)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Tue, 13 Sep 2011 21:34:19 +0000 (17:34 -0400)
Conflicts:
urcu-qsbr.c

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
tests/Makefile.am
tests/test_urcu_lfq.c
tests/test_urcu_lfs.c
urcu-call-rcu-impl.h
urcu-qsbr.c
urcu/arch/generic.h
urcu/arch/ppc.h
urcu/arch/x86.h
urcu/list.h

index 9e5940f749b8e335961785a556b4de1d1a2d8772..7c638ed452c68e857705c62a213b0ae76f4de25c 100644 (file)
@@ -165,8 +165,8 @@ test_urcu_wfq_dynlink_SOURCES = test_urcu_wfq.c
 test_urcu_wfq_dynlink_CFLAGS = -DDYNAMIC_LINK_TEST $(AM_CFLAGS)
 test_urcu_wfq_dynlink_LDADD = $(URCU_COMMON_LIB)
 
-test_urcu_lfs_SOURCES = test_urcu_lfs.c $(URCU_CDS_LIB) $(URCU_DEFER)
-test_urcu_lfs_dynlink_SOURCES = test_urcu_lfs.c $(URCU_DEFER)
+test_urcu_lfs_SOURCES = test_urcu_lfs.c $(URCU) $(URCU_CDS_LIB)
+test_urcu_lfs_dynlink_SOURCES = test_urcu_lfs.c $(URCU)
 test_urcu_lfs_dynlink_CFLAGS = -DDYNAMIC_LINK_TEST $(AM_CFLAGS)
 test_urcu_lfs_dynlink_LDADD = $(URCU_CDS_LIB)
 
index b61a7d4815788af2bcd9e197dc319a8eb053791a..5292ebd4c4f35e673b7e52588cc526c326fcb6be 100644 (file)
@@ -157,6 +157,11 @@ static unsigned long long __thread nr_successful_enqueues;
 static unsigned int nr_enqueuers;
 static unsigned int nr_dequeuers;
 
+struct test {
+       struct cds_lfq_node_rcu list;
+       struct rcu_head rcu;
+};
+
 static struct cds_lfq_queue_rcu q;
 
 void *thr_enqueuer(void *_count)
@@ -176,12 +181,12 @@ void *thr_enqueuer(void *_count)
        cmm_smp_mb();
 
        for (;;) {
-               struct cds_lfq_node_rcu *node = malloc(sizeof(*node));
+               struct test *node = malloc(sizeof(*node));
                if (!node)
                        goto fail;
-               cds_lfq_node_init_rcu(node);
+               cds_lfq_node_init_rcu(&node->list);
                rcu_read_lock();
-               cds_lfq_enqueue_rcu(&q, node);
+               cds_lfq_enqueue_rcu(&q, &node->list);
                rcu_read_unlock();
                nr_successful_enqueues++;
 
@@ -205,6 +210,14 @@ fail:
 
 }
 
+static
+void free_node_cb(struct rcu_head *head)
+{
+       struct test *node =
+               caa_container_of(head, struct test, rcu);
+       free(node);
+}
+
 void *thr_dequeuer(void *_count)
 {
        unsigned long long *count = _count;
@@ -228,14 +241,16 @@ void *thr_dequeuer(void *_count)
        cmm_smp_mb();
 
        for (;;) {
-               struct cds_lfq_node_rcu *node;
+               struct cds_lfq_node_rcu *qnode;
+               struct test *node;
 
                rcu_read_lock();
-               node = cds_lfq_dequeue_rcu(&q);
+               qnode = cds_lfq_dequeue_rcu(&q);
+               node = caa_container_of(qnode, struct test, list);
                rcu_read_unlock();
 
                if (node) {
-                       defer_rcu(free, node);
+                       call_rcu(&node->rcu, free_node_cb);
                        nr_successful_dequeues++;
                }
 
@@ -259,17 +274,18 @@ void *thr_dequeuer(void *_count)
 
 void test_end(struct cds_lfq_queue_rcu *q, unsigned long long *nr_dequeues)
 {
-       struct cds_lfq_node_rcu *node;
+       struct cds_lfq_node_rcu *snode;
 
        do {
-               rcu_read_lock();
-               node = cds_lfq_dequeue_rcu(q);
-               rcu_read_unlock();
-               if (node) {
+               snode = cds_lfq_dequeue_rcu(q);
+               if (snode) {
+                       struct test *node;
+
+                       node = caa_container_of(snode, struct test, list);
                        free(node);     /* no more concurrent access */
                        (*nr_dequeues)++;
                }
-       } while (node);
+       } while (snode);
 }
 
 void show_usage(int argc, char **argv)
@@ -364,6 +380,8 @@ int main(int argc, char **argv)
        count_enqueuer = malloc(2 * sizeof(*count_enqueuer) * nr_enqueuers);
        count_dequeuer = malloc(2 * sizeof(*count_dequeuer) * nr_dequeuers);
        cds_lfq_init_rcu(&q, call_rcu);
+       err = create_all_cpu_call_rcu_data(0);
+       assert(!err);
 
        next_aff = 0;
 
@@ -432,6 +450,7 @@ int main(int argc, char **argv)
                       tot_successful_enqueues,
                       tot_successful_dequeues + end_dequeues);
 
+       free_all_cpu_call_rcu_data();
        free(count_enqueuer);
        free(count_dequeuer);
        free(tid_enqueuer);
index 252454d29aeb46d4758e30b2a7f6b864cc8e3b65..c85fa447c37afd7eb48da32f1c10811e57fd9d85 100644 (file)
@@ -157,6 +157,11 @@ static unsigned long long __thread nr_successful_enqueues;
 static unsigned int nr_enqueuers;
 static unsigned int nr_dequeuers;
 
+struct test {
+       struct cds_lfs_node_rcu list;
+       struct rcu_head rcu;
+};
+
 static struct cds_lfs_stack_rcu s;
 
 void *thr_enqueuer(void *_count)
@@ -176,12 +181,12 @@ void *thr_enqueuer(void *_count)
        cmm_smp_mb();
 
        for (;;) {
-               struct cds_lfs_node_rcu *node = malloc(sizeof(*node));
+               struct test *node = malloc(sizeof(*node));
                if (!node)
                        goto fail;
-               cds_lfs_node_init_rcu(node);
+               cds_lfs_node_init_rcu(&node->list);
                /* No rcu read-side is needed for push */
-               cds_lfs_push_rcu(&s, node);
+               cds_lfs_push_rcu(&s, &node->list);
                nr_successful_enqueues++;
 
                if (unlikely(wdelay))
@@ -204,6 +209,14 @@ fail:
 
 }
 
+static
+void free_node_cb(struct rcu_head *head)
+{
+       struct test *node =
+               caa_container_of(head, struct test, rcu);
+       free(node);
+}
+
 void *thr_dequeuer(void *_count)
 {
        unsigned long long *count = _count;
@@ -227,13 +240,15 @@ void *thr_dequeuer(void *_count)
        cmm_smp_mb();
 
        for (;;) {
-               struct cds_lfs_node_rcu *node;
+               struct cds_lfs_node_rcu *snode;
+               struct test *node;
 
                rcu_read_lock();
-               node = cds_lfs_pop_rcu(&s);
+               snode = cds_lfs_pop_rcu(&s);
+               node = caa_container_of(snode, struct test, list);
                rcu_read_unlock();
                if (node) {
-                       defer_rcu(free, node);
+                       call_rcu(&node->rcu, free_node_cb);
                        nr_successful_dequeues++;
                }
                nr_dequeues++;
@@ -257,15 +272,18 @@ void *thr_dequeuer(void *_count)
 
 void test_end(struct cds_lfs_stack_rcu *s, unsigned long long *nr_dequeues)
 {
-       struct cds_lfs_node_rcu *node;
+       struct cds_lfs_node_rcu *snode;
 
        do {
-               node = cds_lfs_pop_rcu(s);
-               if (node) {
+               snode = cds_lfs_pop_rcu(s);
+               if (snode) {
+                       struct test *node;
+
+                       node = caa_container_of(snode, struct test, list);
                        free(node);
                        (*nr_dequeues)++;
                }
-       } while (node);
+       } while (snode);
 }
 
 void show_usage(int argc, char **argv)
@@ -360,6 +378,8 @@ int main(int argc, char **argv)
        count_enqueuer = malloc(2 * sizeof(*count_enqueuer) * nr_enqueuers);
        count_dequeuer = malloc(2 * sizeof(*count_dequeuer) * nr_dequeuers);
        cds_lfs_init_rcu(&s);
+       err = create_all_cpu_call_rcu_data(0);
+       assert(!err);
 
        next_aff = 0;
 
@@ -426,6 +446,7 @@ int main(int argc, char **argv)
                       tot_successful_enqueues,
                       tot_successful_dequeues + end_dequeues);
 
+       free_all_cpu_call_rcu_data();
        free(count_enqueuer);
        free(count_dequeuer);
        free(tid_enqueuer);
index 38cc00190f018cf1afd7fac07e42bcedec484af3..c14cc18bde825c6c3cfec1557222e764c88f85a9 100644 (file)
@@ -482,7 +482,9 @@ void set_thread_call_rcu_data(struct call_rcu_data *crdp)
 /*
  * Create a separate call_rcu thread for each CPU.  This does not
  * replace a pre-existing call_rcu thread -- use the set_cpu_call_rcu_data()
- * function if you want that behavior.
+ * function if you want that behavior. Should be paired with
+ * free_all_cpu_call_rcu_data() to teardown these call_rcu worker
+ * threads.
  */
 
 int create_all_cpu_call_rcu_data(unsigned long flags)
index 1dc99792d7a49f2f6f3614693e34cde239312bfb..1adaa9409926edb602302584347edf3febd7c7fd 100644 (file)
@@ -208,21 +208,17 @@ void synchronize_rcu(void)
        was_online = rcu_reader.ctr;
 
        /* All threads should read qparity before accessing data structure
-        * where new ptr points to.
-        */
-       /* Write new ptr before changing the qparity */
-       cmm_smp_mb();
-
-       /*
+        * where new ptr points to.  In the "then" case, rcu_thread_offline
+        * includes a memory barrier.
+        *
         * Mark the writer thread offline to make sure we don't wait for
         * our own quiescent state. This allows using synchronize_rcu()
         * in threads registered as readers.
         */
-       if (was_online) {
-               CMM_STORE_SHARED(rcu_reader.ctr, 0);
-               cmm_smp_mb();   /* write rcu_reader.ctr before read futex */
-               wake_up_gp();
-       }
+       if (was_online)
+               rcu_thread_offline();
+       else
+               cmm_smp_mb();
 
        mutex_lock(&rcu_gp_lock);
 
@@ -263,9 +259,9 @@ out:
         * freed.
         */
        if (was_online)
-               _CMM_STORE_SHARED(rcu_reader.ctr,
-                                 CMM_LOAD_SHARED(rcu_gp_ctr));
-       cmm_smp_mb();
+               rcu_thread_online();
+       else
+               cmm_smp_mb();
 }
 #else /* !(CAA_BITS_PER_LONG < 64) */
 void synchronize_rcu(void)
@@ -279,12 +275,10 @@ void synchronize_rcu(void)
         * our own quiescent state. This allows using synchronize_rcu()
         * in threads registered as readers.
         */
-       cmm_smp_mb();
-       if (was_online) {
-               CMM_STORE_SHARED(rcu_reader.ctr, 0);
-               cmm_smp_mb();   /* write rcu_reader.ctr before read futex */
-               wake_up_gp();
-       }
+       if (was_online)
+               rcu_thread_offline();
+       else
+               cmm_smp_mb();
 
        mutex_lock(&rcu_gp_lock);
        if (cds_list_empty(&registry))
@@ -294,9 +288,9 @@ out:
        mutex_unlock(&rcu_gp_lock);
 
        if (was_online)
-               _CMM_STORE_SHARED(rcu_reader.ctr,
-                                 CMM_LOAD_SHARED(rcu_gp_ctr));
-       cmm_smp_mb();
+               rcu_thread_online();
+       else
+               cmm_smp_mb();
 }
 #endif  /* !(CAA_BITS_PER_LONG < 64) */
 
index 100d3c6c12948a2f88bc769f0dc26325a1036027..1ea7f59ca396d0c13d49372ccb26976bbed2d2d8 100644 (file)
@@ -100,22 +100,50 @@ extern "C" {
 #endif
 
 #ifdef CONFIG_RCU_SMP
+#ifndef cmm_smp_mb
 #define cmm_smp_mb()   cmm_mb()
+#endif
+#ifndef cmm_smp_rmb
 #define cmm_smp_rmb()  cmm_rmb()
+#endif
+#ifndef cmm_smp_wmb
 #define cmm_smp_wmb()  cmm_wmb()
+#endif
+#ifndef cmm_smp_mc
 #define cmm_smp_mc()   cmm_mc()
+#endif
+#ifndef cmm_smp_rmc
 #define cmm_smp_rmc()  cmm_rmc()
+#endif
+#ifndef cmm_smp_wmc
 #define cmm_smp_wmc()  cmm_wmc()
+#endif
+#ifndef cmm_smp_read_barrier_depends
 #define cmm_smp_read_barrier_depends() cmm_read_barrier_depends()
+#endif
 #else
+#ifndef cmm_smp_mb
 #define cmm_smp_mb()   cmm_barrier()
+#endif
+#ifndef cmm_smp_rmb
 #define cmm_smp_rmb()  cmm_barrier()
+#endif
+#ifndef cmm_smp_wmb
 #define cmm_smp_wmb()  cmm_barrier()
+#endif
+#ifndef cmm_smp_mc
 #define cmm_smp_mc()   cmm_barrier()
+#endif
+#ifndef cmm_smp_rmc
 #define cmm_smp_rmc()  cmm_barrier()
+#endif
+#ifndef cmm_smp_wmc
 #define cmm_smp_wmc()  cmm_barrier()
+#endif
+#ifndef cmm_smp_read_barrier_depends
 #define cmm_smp_read_barrier_depends()
 #endif
+#endif
 
 #ifndef caa_cpu_relax
 #define caa_cpu_relax()                cmm_barrier()
index d7317bb327c72afa48f087f50251cbe311a2ff62..a03d688837344f0bb2d438c7161d43dcb208a5ef 100644 (file)
@@ -48,11 +48,24 @@ extern "C" {
                rval;                                   \
        })
 
+#define mftb()                                         \
+       ({                                              \
+               unsigned long long rval;                \
+               asm volatile("mftb %0" : "=r" (rval));  \
+               rval;                                   \
+       })
+
 typedef unsigned long long cycles_t;
 
-static inline cycles_t caa_get_cycles (void)
+#ifdef __powerpc64__
+static inline cycles_t caa_get_cycles(void)
 {
-       long h, l;
+       return (cycles_t) mftb();
+}
+#else
+static inline cycles_t caa_get_cycles(void)
+{
+       unsigned long h, l;
 
        for (;;) {
                h = mftbu();
@@ -63,6 +76,7 @@ static inline cycles_t caa_get_cycles (void)
                        return (((cycles_t) h) << 32) + l;
        }
 }
+#endif
 
 #ifdef __cplusplus 
 }
index 9e5411fd4453ee7033d15e0280fdf9208d8b3f50..c1e2e072ff752582a93e90cb7189e45b14283870 100644 (file)
@@ -33,12 +33,27 @@ extern "C" {
 
 #ifdef CONFIG_RCU_HAVE_FENCE
 #define cmm_mb()    asm volatile("mfence":::"memory")
-#define cmm_rmb()   asm volatile("lfence":::"memory")
-#define cmm_wmb()   asm volatile("sfence"::: "memory")
+
+/*
+ * Define cmm_rmb/cmm_wmb to "strict" barriers that may be needed when
+ * using SSE or working with I/O areas.  cmm_smp_rmb/cmm_smp_wmb are
+ * only compiler barriers, which is enough for general use.
+ */
+#define cmm_rmb()     asm volatile("lfence":::"memory")
+#define cmm_wmb()     asm volatile("sfence"::: "memory")
+#define cmm_smp_rmb() cmm_barrier()
+#define cmm_smp_wmb() cmm_barrier()
 #else
 /*
- * Some non-Intel clones support out of order store. cmm_wmb() ceases to be a
- * nop for these.
+ * We leave smp_rmb/smp_wmb as full barriers for processors that do not have
+ * fence instructions.
+ *
+ * An empty cmm_smp_rmb() may not be enough on old PentiumPro multiprocessor
+ * systems, due to an erratum.  The Linux kernel says that "Even distro
+ * kernels should think twice before enabling this", but for now let's
+ * be conservative and leave the full barrier on 32-bit processors.  Also,
+ * IDT WinChip supports weak store ordering, and the kernel may enable it
+ * under our feet; cmm_smp_wmb() ceases to be a nop for these processors.
  */
 #define cmm_mb()    asm volatile("lock; addl $0,0(%%esp)":::"memory")
 #define cmm_rmb()   asm volatile("lock; addl $0,0(%%esp)":::"memory")
index 7d2a9a11e39b12d061e6a68b90b6918f307af17a..04dad80929ef1d6f2ec1a3adcd302d8f2f962533 100644 (file)
@@ -126,12 +126,15 @@ cds_list_splice (struct cds_list_head *add, struct cds_list_head *head)
     }
 }
 
-
 /* Get typed element from list at a given position.  */
 #define cds_list_entry(ptr, type, member) \
   ((type *) ((char *) (ptr) - (unsigned long) (&((type *) 0)->member)))
 
 
+/* Get first entry from a list. */
+#define cds_list_first_entry(ptr, type, member) \
+       cds_list_entry((ptr)->next, type, member)
+
 
 /* Iterate forward over the elements of the list.  */
 #define cds_list_for_each(pos, head) \
This page took 0.034077 seconds and 4 git commands to generate.