call_rcu: use cpu affinity for per-cpu call_rcu threads
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Mon, 6 Jun 2011 21:24:42 +0000 (17:24 -0400)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Mon, 6 Jun 2011 21:24:42 +0000 (17:24 -0400)
I played a bit with the call_rcu() implementation alongside with my
rbtree tests, and noticed the following:

If I use per-cpu call_rcu threads with URCU_CALL_RCU_RT flag, with one
updater thread only for my rbtree (no reader), I get 38365 updates/s.
If I add cpu affinity to these per-cpu call_rcu threads (I have prepared
a patch that does this), it jumps to 54219 updates/s.  So it looks like
keeping per-cpu affinity for the call_rcu thread is a good thing.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
API.txt
tests/rcutorture.h
urcu-call-rcu-impl.h
urcu-call-rcu.h
urcu-qsbr.c
urcu.c

diff --git a/API.txt b/API.txt
index 162ca9397bedb715d264fc16f237fff667868ce2..a7cd5a2186abd8089b7e11d6f5df2f719eeb889d 100644 (file)
--- a/API.txt
+++ b/API.txt
@@ -59,12 +59,15 @@ void call_rcu(struct rcu_head *head,
 
                call_rcu(&p->rcu, func);
 
-struct call_rcu_data *create_call_rcu_data(unsigned long flags);
+struct call_rcu_data *create_call_rcu_data(unsigned long flags,
+                                          int cpu_affinity);
 
        Returns a handle that can be passed to the following
        primitives.  The "flags" argument can be zero, or can be
        URCU_CALL_RCU_RT if the worker threads associated with the
-       new helper thread are to get real-time response.
+       new helper thread are to get real-time response. The argument
+       "cpu_affinity" specifies a cpu on which the call_rcu thread should
+       be affined to. It is ignored if negative.
 
 struct call_rcu_data *get_default_call_rcu_data(void);
 
index aba74b0bcdadc70971af0e5e66225cfcaa034a62..c5253d9cfead570db75313d237cd28b12b42e704 100644 (file)
@@ -156,7 +156,7 @@ void *rcu_update_perf_test(void *arg)
        if ((random() & 0xf00) == 0) {
                struct call_rcu_data *crdp;
 
-               crdp = create_call_rcu_data(0);
+               crdp = create_call_rcu_data(0, -1);
                if (crdp != NULL) {
                        fprintf(stderr,
                                "Using per-thread call_rcu() worker.\n");
@@ -385,7 +385,7 @@ void *rcu_fake_update_stress_test(void *arg)
        if ((random() & 0xf00) == 0) {
                struct call_rcu_data *crdp;
 
-               crdp = create_call_rcu_data(0);
+               crdp = create_call_rcu_data(0, -1);
                if (crdp != NULL) {
                        fprintf(stderr,
                                "Using per-thread call_rcu() worker.\n");
index 9fa6aa663e157a226bd0d28cc05566b5b4dcc6bf..f1e46fe3e7494cb6c463bbc76b2b11bdabf2a947 100644 (file)
@@ -20,6 +20,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <pthread.h>
 #include <signal.h>
@@ -31,6 +32,7 @@
 #include <sys/time.h>
 #include <syscall.h>
 #include <unistd.h>
+#include <sched.h>
 
 #include "config.h"
 #include "urcu/wfqueue.h"
@@ -47,6 +49,7 @@ struct call_rcu_data {
        pthread_cond_t cond;
        unsigned long qlen;
        pthread_t tid;
+       int cpu_affinity;
        struct cds_list_head list;
 } __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
 
@@ -146,6 +149,31 @@ static void call_rcu_unlock(pthread_mutex_t *pmp)
        }
 }
 
+#if HAVE_SCHED_SETAFFINITY
+static
+int set_thread_cpu_affinity(struct call_rcu_data *crdp)
+{
+       cpu_set_t mask;
+
+       if (crdp->cpu_affinity < 0)
+               return 0;
+
+       CPU_ZERO(&mask);
+       CPU_SET(crdp->cpu_affinity, &mask);
+#if SCHED_SETAFFINITY_ARGS == 2
+       return sched_setaffinity(0, &mask);
+#else
+       return sched_setaffinity(0, sizeof(mask), &mask);
+#endif
+}
+#else
+static
+int set_thread_cpu_affinity(struct call_rcu_data *crdp)
+{
+       return 0;
+}
+#endif
+
 /* This is the code run by each call_rcu thread. */
 
 static void *call_rcu_thread(void *arg)
@@ -156,6 +184,11 @@ static void *call_rcu_thread(void *arg)
        struct call_rcu_data *crdp = (struct call_rcu_data *)arg;
        struct rcu_head *rhp;
 
+       if (set_thread_cpu_affinity(crdp) != 0) {
+               perror("pthread_setaffinity_np");
+               exit(-1);
+       }
+
        thread_call_rcu_data = crdp;
        for (;;) {
                if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) {
@@ -214,7 +247,8 @@ static void *call_rcu_thread(void *arg)
  */
 
 static void call_rcu_data_init(struct call_rcu_data **crdpp,
-                              unsigned long flags)
+                              unsigned long flags,
+                              int cpu_affinity)
 {
        struct call_rcu_data *crdp;
 
@@ -236,6 +270,7 @@ static void call_rcu_data_init(struct call_rcu_data **crdpp,
        }
        crdp->flags = flags | URCU_CALL_RCU_RUNNING;
        cds_list_add(&crdp->list, &call_rcu_data_list);
+       crdp->cpu_affinity = cpu_affinity;
        cmm_smp_mb();  /* Structure initialized before pointer is planted. */
        *crdpp = crdp;
        if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) {
@@ -280,20 +315,22 @@ pthread_t get_call_rcu_thread(struct call_rcu_data *crdp)
  * Create a call_rcu_data structure (with thread) and return a pointer.
  */
 
-static struct call_rcu_data *__create_call_rcu_data(unsigned long flags)
+static struct call_rcu_data *__create_call_rcu_data(unsigned long flags,
+                                                   int cpu_affinity)
 {
        struct call_rcu_data *crdp;
 
-       call_rcu_data_init(&crdp, flags);
+       call_rcu_data_init(&crdp, flags, cpu_affinity);
        return crdp;
 }
 
-struct call_rcu_data *create_call_rcu_data(unsigned long flags)
+struct call_rcu_data *create_call_rcu_data(unsigned long flags,
+                                          int cpu_affinity)
 {
        struct call_rcu_data *crdp;
 
        call_rcu_lock(&call_rcu_mutex);
-       crdp = __create_call_rcu_data(flags);
+       crdp = __create_call_rcu_data(flags, cpu_affinity);
        call_rcu_unlock(&call_rcu_mutex);
        return crdp;
 }
@@ -346,7 +383,7 @@ struct call_rcu_data *get_default_call_rcu_data(void)
                call_rcu_unlock(&call_rcu_mutex);
                return default_call_rcu_data;
        }
-       call_rcu_data_init(&default_call_rcu_data, 0);
+       call_rcu_data_init(&default_call_rcu_data, 0, -1);
        call_rcu_unlock(&call_rcu_mutex);
        return default_call_rcu_data;
 }
@@ -434,7 +471,7 @@ int create_all_cpu_call_rcu_data(unsigned long flags)
                        call_rcu_unlock(&call_rcu_mutex);
                        continue;
                }
-               crdp = __create_call_rcu_data(flags);
+               crdp = __create_call_rcu_data(flags, i);
                if (crdp == NULL) {
                        call_rcu_unlock(&call_rcu_mutex);
                        errno = ENOMEM;
index a841b88e3d7decbc241ac28533c5776b3347cfb4..e76a844018f2e24a129ab1e3db957ec90f6cdf40 100644 (file)
@@ -64,7 +64,8 @@ struct rcu_head {
  */
 struct call_rcu_data *get_cpu_call_rcu_data(int cpu);
 pthread_t get_call_rcu_thread(struct call_rcu_data *crdp);
-struct call_rcu_data *create_call_rcu_data(unsigned long flags);
+struct call_rcu_data *create_call_rcu_data(unsigned long flags,
+                                          int cpu_affinity);
 int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp);
 struct call_rcu_data *get_default_call_rcu_data(void);
 struct call_rcu_data *get_call_rcu_data(void);
index cf8b5cec46d2c10afaff8c53a56e9adfa4e27eb1..a4c320acb48b1b91f7da33e92b9fb82b157d0265 100644 (file)
@@ -23,6 +23,7 @@
  * IBM's contributions to this file may be relicensed under LGPLv2 or later.
  */
 
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <pthread.h>
 #include <signal.h>
diff --git a/urcu.c b/urcu.c
index d356f54648d551b44316d8d11cbeb09cf7c9b84d..fa7499c20e649f232a841f5c5e7699b9eafad640 100644 (file)
--- a/urcu.c
+++ b/urcu.c
@@ -24,6 +24,7 @@
  */
 
 #define _BSD_SOURCE
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <pthread.h>
 #include <signal.h>
This page took 0.0288 seconds and 4 git commands to generate.