From c1d2c60b1c754815d27f29705d5f2077c0900148 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 6 Jun 2011 17:24:42 -0400 Subject: [PATCH] call_rcu: use cpu affinity for per-cpu call_rcu threads I played a bit with the call_rcu() implementation alongside with my rbtree tests, and noticed the following: If I use per-cpu call_rcu threads with URCU_CALL_RCU_RT flag, with one updater thread only for my rbtree (no reader), I get 38365 updates/s. If I add cpu affinity to these per-cpu call_rcu threads (I have prepared a patch that does this), it jumps to 54219 updates/s. So it looks like keeping per-cpu affinity for the call_rcu thread is a good thing. Signed-off-by: Mathieu Desnoyers --- API.txt | 7 ++++-- tests/rcutorture.h | 4 ++-- urcu-call-rcu-impl.h | 51 ++++++++++++++++++++++++++++++++++++++------ urcu-call-rcu.h | 3 ++- urcu-qsbr.c | 1 + urcu.c | 1 + 6 files changed, 55 insertions(+), 12 deletions(-) diff --git a/API.txt b/API.txt index 162ca93..a7cd5a2 100644 --- a/API.txt +++ b/API.txt @@ -59,12 +59,15 @@ void call_rcu(struct rcu_head *head, call_rcu(&p->rcu, func); -struct call_rcu_data *create_call_rcu_data(unsigned long flags); +struct call_rcu_data *create_call_rcu_data(unsigned long flags, + int cpu_affinity); Returns a handle that can be passed to the following primitives. The "flags" argument can be zero, or can be URCU_CALL_RCU_RT if the worker threads associated with the - new helper thread are to get real-time response. + new helper thread are to get real-time response. The argument + "cpu_affinity" specifies a cpu on which the call_rcu thread should + be affined to. It is ignored if negative. struct call_rcu_data *get_default_call_rcu_data(void); diff --git a/tests/rcutorture.h b/tests/rcutorture.h index aba74b0..c5253d9 100644 --- a/tests/rcutorture.h +++ b/tests/rcutorture.h @@ -156,7 +156,7 @@ void *rcu_update_perf_test(void *arg) if ((random() & 0xf00) == 0) { struct call_rcu_data *crdp; - crdp = create_call_rcu_data(0); + crdp = create_call_rcu_data(0, -1); if (crdp != NULL) { fprintf(stderr, "Using per-thread call_rcu() worker.\n"); @@ -385,7 +385,7 @@ void *rcu_fake_update_stress_test(void *arg) if ((random() & 0xf00) == 0) { struct call_rcu_data *crdp; - crdp = create_call_rcu_data(0); + crdp = create_call_rcu_data(0, -1); if (crdp != NULL) { fprintf(stderr, "Using per-thread call_rcu() worker.\n"); diff --git a/urcu-call-rcu-impl.h b/urcu-call-rcu-impl.h index 9fa6aa6..f1e46fe 100644 --- a/urcu-call-rcu-impl.h +++ b/urcu-call-rcu-impl.h @@ -20,6 +20,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#define _GNU_SOURCE #include #include #include @@ -31,6 +32,7 @@ #include #include #include +#include #include "config.h" #include "urcu/wfqueue.h" @@ -47,6 +49,7 @@ struct call_rcu_data { pthread_cond_t cond; unsigned long qlen; pthread_t tid; + int cpu_affinity; struct cds_list_head list; } __attribute__((aligned(CAA_CACHE_LINE_SIZE))); @@ -146,6 +149,31 @@ static void call_rcu_unlock(pthread_mutex_t *pmp) } } +#if HAVE_SCHED_SETAFFINITY +static +int set_thread_cpu_affinity(struct call_rcu_data *crdp) +{ + cpu_set_t mask; + + if (crdp->cpu_affinity < 0) + return 0; + + CPU_ZERO(&mask); + CPU_SET(crdp->cpu_affinity, &mask); +#if SCHED_SETAFFINITY_ARGS == 2 + return sched_setaffinity(0, &mask); +#else + return sched_setaffinity(0, sizeof(mask), &mask); +#endif +} +#else +static +int set_thread_cpu_affinity(struct call_rcu_data *crdp) +{ + return 0; +} +#endif + /* This is the code run by each call_rcu thread. */ static void *call_rcu_thread(void *arg) @@ -156,6 +184,11 @@ static void *call_rcu_thread(void *arg) struct call_rcu_data *crdp = (struct call_rcu_data *)arg; struct rcu_head *rhp; + if (set_thread_cpu_affinity(crdp) != 0) { + perror("pthread_setaffinity_np"); + exit(-1); + } + thread_call_rcu_data = crdp; for (;;) { if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) { @@ -214,7 +247,8 @@ static void *call_rcu_thread(void *arg) */ static void call_rcu_data_init(struct call_rcu_data **crdpp, - unsigned long flags) + unsigned long flags, + int cpu_affinity) { struct call_rcu_data *crdp; @@ -236,6 +270,7 @@ static void call_rcu_data_init(struct call_rcu_data **crdpp, } crdp->flags = flags | URCU_CALL_RCU_RUNNING; cds_list_add(&crdp->list, &call_rcu_data_list); + crdp->cpu_affinity = cpu_affinity; cmm_smp_mb(); /* Structure initialized before pointer is planted. */ *crdpp = crdp; if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) { @@ -280,20 +315,22 @@ pthread_t get_call_rcu_thread(struct call_rcu_data *crdp) * Create a call_rcu_data structure (with thread) and return a pointer. */ -static struct call_rcu_data *__create_call_rcu_data(unsigned long flags) +static struct call_rcu_data *__create_call_rcu_data(unsigned long flags, + int cpu_affinity) { struct call_rcu_data *crdp; - call_rcu_data_init(&crdp, flags); + call_rcu_data_init(&crdp, flags, cpu_affinity); return crdp; } -struct call_rcu_data *create_call_rcu_data(unsigned long flags) +struct call_rcu_data *create_call_rcu_data(unsigned long flags, + int cpu_affinity) { struct call_rcu_data *crdp; call_rcu_lock(&call_rcu_mutex); - crdp = __create_call_rcu_data(flags); + crdp = __create_call_rcu_data(flags, cpu_affinity); call_rcu_unlock(&call_rcu_mutex); return crdp; } @@ -346,7 +383,7 @@ struct call_rcu_data *get_default_call_rcu_data(void) call_rcu_unlock(&call_rcu_mutex); return default_call_rcu_data; } - call_rcu_data_init(&default_call_rcu_data, 0); + call_rcu_data_init(&default_call_rcu_data, 0, -1); call_rcu_unlock(&call_rcu_mutex); return default_call_rcu_data; } @@ -434,7 +471,7 @@ int create_all_cpu_call_rcu_data(unsigned long flags) call_rcu_unlock(&call_rcu_mutex); continue; } - crdp = __create_call_rcu_data(flags); + crdp = __create_call_rcu_data(flags, i); if (crdp == NULL) { call_rcu_unlock(&call_rcu_mutex); errno = ENOMEM; diff --git a/urcu-call-rcu.h b/urcu-call-rcu.h index a841b88..e76a844 100644 --- a/urcu-call-rcu.h +++ b/urcu-call-rcu.h @@ -64,7 +64,8 @@ struct rcu_head { */ struct call_rcu_data *get_cpu_call_rcu_data(int cpu); pthread_t get_call_rcu_thread(struct call_rcu_data *crdp); -struct call_rcu_data *create_call_rcu_data(unsigned long flags); +struct call_rcu_data *create_call_rcu_data(unsigned long flags, + int cpu_affinity); int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp); struct call_rcu_data *get_default_call_rcu_data(void); struct call_rcu_data *get_call_rcu_data(void); diff --git a/urcu-qsbr.c b/urcu-qsbr.c index cf8b5ce..a4c320a 100644 --- a/urcu-qsbr.c +++ b/urcu-qsbr.c @@ -23,6 +23,7 @@ * IBM's contributions to this file may be relicensed under LGPLv2 or later. */ +#define _GNU_SOURCE #include #include #include diff --git a/urcu.c b/urcu.c index d356f54..fa7499c 100644 --- a/urcu.c +++ b/urcu.c @@ -24,6 +24,7 @@ */ #define _BSD_SOURCE +#define _GNU_SOURCE #include #include #include -- 2.34.1