From 420e5b9208a77041b662c9d9cdc021287f95eec1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 8 Jun 2009 14:41:37 -0400 Subject: [PATCH] Add QSBR RCU timing tests urcu-qsbr can be used for QSBR-based RCU. Currently not built as a full-blown library .so, but could. Signed-off-by: Mathieu Desnoyers --- Makefile | 8 +- test_qsbr_timing.c | 217 ++++++++++++++++++++++++++++++++++ urcu-qsbr.h | 284 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 508 insertions(+), 1 deletion(-) create mode 100644 test_qsbr_timing.c create mode 100644 urcu-qsbr.h diff --git a/Makefile b/Makefile index c022522..371c107 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ SRC_DEP=`echo $^ | sed 's/[^ ]*\.h//g'` all: arch-api test_urcu test_urcu_dynamic_link test_urcu_timing \ test_rwlock_timing test_perthreadlock_timing test_urcu_yield urcu-asm.S \ - urcu-asm.o urcutorture urcutorture-yield liburcu.so + test_qsbr_timing urcu-asm.o urcutorture urcutorture-yield liburcu.so arch-api: api.h arch.h # Run either make pthreads-x86 or make pthreads-ppc prior to build @@ -42,6 +42,9 @@ test_urcu_yield: urcu-yield.o test_urcu.c urcu.h test_urcu_timing: urcu.o test_urcu_timing.c urcu.h $(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP) +test_qsbr_timing: urcu-qsbr.o test_qsbr_timing.c urcu-qsbr.h + $(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP) + test_rwlock_timing: urcu.o test_rwlock_timing.c urcu.h $(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP) @@ -51,6 +54,9 @@ test_perthreadlock_timing: urcu.o test_perthreadlock_timing.c urcu.h urcu.o: urcu.c urcu.h $(CC) -fPIC ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP) +urcu-qsbr.o: urcu-qsbr.c urcu-qsbr.h + $(CC) -fPIC ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP) + liburcu.so: urcu.o $(CC) -fPIC -shared -o $@ $< diff --git a/test_qsbr_timing.c b/test_qsbr_timing.c new file mode 100644 index 0000000..bdbb581 --- /dev/null +++ b/test_qsbr_timing.c @@ -0,0 +1,217 @@ +/* + * test_qsbr_timing.c + * + * Userspace QSBR - test program + * + * Copyright February 2009 - Mathieu Desnoyers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_syscall0) +_syscall0(pid_t, gettid) +#elif defined(__NR_gettid) +static inline pid_t gettid(void) +{ + return syscall(__NR_gettid); +} +#else +#warning "use pid as tid" +static inline pid_t gettid(void) +{ + return getpid(); +} +#endif + +#define _LGPL_SOURCE +#include "urcu-qsbr.h" + +pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER; + +void rcu_copy_mutex_lock(void) +{ + int ret; + ret = pthread_mutex_lock(&rcu_copy_mutex); + if (ret) { + perror("Error in pthread mutex lock"); + exit(-1); + } +} + +void rcu_copy_mutex_unlock(void) +{ + int ret; + + ret = pthread_mutex_unlock(&rcu_copy_mutex); + if (ret) { + perror("Error in pthread mutex unlock"); + exit(-1); + } +} + +struct test_array { + int a; +}; + +static struct test_array *test_rcu_pointer; + +#define OUTER_READ_LOOP 2000U +#define INNER_READ_LOOP 100000U +#define READ_LOOP ((unsigned long long)OUTER_READ_LOOP * INNER_READ_LOOP) + +#define OUTER_WRITE_LOOP 10U +#define INNER_WRITE_LOOP 200U +#define WRITE_LOOP ((unsigned long long)OUTER_WRITE_LOOP * INNER_WRITE_LOOP) + +#define NR_READ 10 +#define NR_WRITE 9 + +static cycles_t reader_time[NR_READ] __attribute__((aligned(128))); +static cycles_t writer_time[NR_WRITE] __attribute__((aligned(128))); + +void *thr_reader(void *arg) +{ + int i, j; + struct test_array *local_ptr; + cycles_t time1, time2; + + printf("thread_begin %s, thread id : %lx, tid %lu\n", + "reader", pthread_self(), (unsigned long)gettid()); + sleep(2); + + rcu_register_thread(); + + time1 = get_cycles(); + for (i = 0; i < OUTER_READ_LOOP; i++) { + for (j = 0; j < INNER_READ_LOOP; j++) { + _rcu_read_lock(); + local_ptr = _rcu_dereference(test_rcu_pointer); + if (local_ptr) { + assert(local_ptr->a == 8); + } + _rcu_read_unlock(); + } + _rcu_quiescent_state(); + } + time2 = get_cycles(); + + rcu_unregister_thread(); + + reader_time[(unsigned long)arg] = time2 - time1; + + sleep(2); + printf("thread_end %s, thread id : %lx, tid %lu\n", + "reader", pthread_self(), (unsigned long)gettid()); + return ((void*)1); + +} + +void *thr_writer(void *arg) +{ + int i, j; + struct test_array *new, *old; + cycles_t time1, time2; + + printf("thread_begin %s, thread id : %lx, tid %lu\n", + "writer", pthread_self(), (unsigned long)gettid()); + sleep(2); + + for (i = 0; i < OUTER_WRITE_LOOP; i++) { + for (j = 0; j < INNER_WRITE_LOOP; j++) { + time1 = get_cycles(); + new = malloc(sizeof(struct test_array)); + rcu_copy_mutex_lock(); + old = test_rcu_pointer; + if (old) { + assert(old->a == 8); + } + new->a = 8; + old = _rcu_publish_content(&test_rcu_pointer, new); + rcu_copy_mutex_unlock(); + /* can be done after unlock */ + if (old) { + old->a = 0; + } + free(old); + time2 = get_cycles(); + writer_time[(unsigned long)arg] += time2 - time1; + usleep(1); + } + } + + printf("thread_end %s, thread id : %lx, tid %lu\n", + "writer", pthread_self(), (unsigned long)gettid()); + return ((void*)2); +} + +int main() +{ + int err; + pthread_t tid_reader[NR_READ], tid_writer[NR_WRITE]; + void *tret; + int i; + cycles_t tot_rtime = 0; + cycles_t tot_wtime = 0; + + printf("thread %-6s, thread id : %lx, tid %lu\n", + "main", pthread_self(), (unsigned long)gettid()); + + for (i = 0; i < NR_READ; i++) { + err = pthread_create(&tid_reader[i], NULL, thr_reader, + (void *)(long)i); + if (err != 0) + exit(1); + } + for (i = 0; i < NR_WRITE; i++) { + err = pthread_create(&tid_writer[i], NULL, thr_writer, + (void *)(long)i); + if (err != 0) + exit(1); + } + + sleep(10); + + for (i = 0; i < NR_READ; i++) { + err = pthread_join(tid_reader[i], &tret); + if (err != 0) + exit(1); + tot_rtime += reader_time[i]; + } + for (i = 0; i < NR_WRITE; i++) { + err = pthread_join(tid_writer[i], &tret); + if (err != 0) + exit(1); + tot_wtime += writer_time[i]; + } + free(test_rcu_pointer); + printf("Time per read : %g cycles\n", + (double)tot_rtime / ((double)NR_READ * (double)READ_LOOP)); + printf("Time per write : %g cycles\n", + (double)tot_wtime / ((double)NR_WRITE * (double)WRITE_LOOP)); + + return 0; +} diff --git a/urcu-qsbr.h b/urcu-qsbr.h new file mode 100644 index 0000000..101b94d --- /dev/null +++ b/urcu-qsbr.h @@ -0,0 +1,284 @@ +#ifndef _URCU_QSBR_H +#define _URCU_QSBR_H + +/* + * urcu-static.h + * + * Userspace RCU header. + * + * TO BE INCLUDED ONLY IN LGPL-COMPATIBLE CODE. See urcu.h for linking + * dynamically with the userspace rcu library. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include +#include + +#include +#include + +/* + * Identify a shared load. A smp_rmc() or smp_mc() should come before the load. + */ +#define _LOAD_SHARED(p) ACCESS_ONCE(p) + +/* + * Load a data from shared memory, doing a cache flush if required. + */ +#define LOAD_SHARED(p) \ + ({ \ + smp_rmc(); \ + _LOAD_SHARED(p); \ + }) + +/* + * Identify a shared store. A smp_wmc() or smp_mc() should follow the store. + */ +#define _STORE_SHARED(x, v) ({ ACCESS_ONCE(x) = (v); }) + +/* + * Store v into x, where x is located in shared memory. Performs the required + * cache flush after writing. Returns v. + */ +#define STORE_SHARED(x, v) \ + ({ \ + _STORE_SHARED(x, v); \ + smp_wmc(); \ + (v); \ + }) + +/** + * _rcu_dereference - reads (copy) a RCU-protected pointer to a local variable + * into a RCU read-side critical section. The pointer can later be safely + * dereferenced within the critical section. + * + * This ensures that the pointer copy is invariant thorough the whole critical + * section. + * + * Inserts memory barriers on architectures that require them (currently only + * Alpha) and documents which pointers are protected by RCU. + * + * Should match rcu_assign_pointer() or rcu_xchg_pointer(). + */ + +#define _rcu_dereference(p) ({ \ + typeof(p) _________p1 = LOAD_SHARED(p); \ + smp_read_barrier_depends(); \ + (_________p1); \ + }) + +/* + * This code section can only be included in LGPL 2.1 compatible source code. + * See below for the function call wrappers which can be used in code meant to + * be only linked with the Userspace RCU library. This comes with a small + * performance degradation on the read-side due to the added function calls. + * This is required to permit relinking with newer versions of the library. + */ + +/* + * The signal number used by the RCU library can be overridden with + * -DSIGURCU= when compiling the library. + */ +#ifndef SIGURCU +#define SIGURCU SIGUSR1 +#endif + +/* + * If a reader is really non-cooperative and refuses to commit its + * urcu_active_readers count to memory (there is no barrier in the reader + * per-se), kick it after a few loops waiting for it. + */ +#define KICK_READER_LOOPS 10000 + +#ifdef DEBUG_YIELD +#include +#include +#include +#include + +#define YIELD_READ (1 << 0) +#define YIELD_WRITE (1 << 1) + +/* Updates without DEBUG_FULL_MB are much slower. Account this in the delay */ +#ifdef DEBUG_FULL_MB +/* maximum sleep delay, in us */ +#define MAX_SLEEP 50 +#else +#define MAX_SLEEP 30000 +#endif + +extern unsigned int yield_active; +extern unsigned int __thread rand_yield; + +static inline void debug_yield_read(void) +{ + if (yield_active & YIELD_READ) + if (rand_r(&rand_yield) & 0x1) + usleep(rand_r(&rand_yield) % MAX_SLEEP); +} + +static inline void debug_yield_write(void) +{ + if (yield_active & YIELD_WRITE) + if (rand_r(&rand_yield) & 0x1) + usleep(rand_r(&rand_yield) % MAX_SLEEP); +} + +static inline void debug_yield_init(void) +{ + rand_yield = time(NULL) ^ pthread_self(); +} +#else +static inline void debug_yield_read(void) +{ +} + +static inline void debug_yield_write(void) +{ +} + +static inline void debug_yield_init(void) +{ + +} +#endif + +#ifdef DEBUG_FULL_MB +static inline void reader_barrier() +{ + smp_mb(); +} +#else +static inline void reader_barrier() +{ + barrier(); +} +#endif + +/* + * The trick here is that RCU_GP_CTR_BIT must be a multiple of 8 so we can use a + * full 8-bits, 16-bits or 32-bits bitmask for the lower order bits. + */ +#define RCU_GP_COUNT (1UL << 0) +/* Use the amount of bits equal to half of the architecture long size */ +#define RCU_GP_CTR_BIT (1UL << (sizeof(long) << 2)) +#define RCU_GP_CTR_NEST_MASK (RCU_GP_CTR_BIT - 1) + +/* + * Global quiescent period counter with low-order bits unused. + * Using a int rather than a char to eliminate false register dependencies + * causing stalls on some architectures. + */ +extern long urcu_gp_ctr; + +extern long __thread urcu_active_readers; + +static inline int rcu_old_gp_ongoing(long *value) +{ + if (value == NULL) + return 0; + + return LOAD_SHARED(*value) & 1; +} + +static inline void _rcu_read_lock(void) +{ +} + +static inline void _rcu_read_unlock(void) +{ +} + +static inline void _rcu_quiescent_state(void) +{ + smp_mb(); + urcu_active_readers = ACCESS_ONCE(urcu_gp_ctr) + 1; + smp_mb(); +} + +static inline void _rcu_thread_offline(void) +{ + smp_mb(); + urcu_active_readers = ACCESS_ONCE(urcu_gp_ctr); +} + +static inline void _rcu_thread_online(void) +{ + urcu_active_readers = ACCESS_ONCE(urcu_gp_ctr) + 1; + smp_mb(); +} + +/** + * _rcu_assign_pointer - assign (publicize) a pointer to a new data structure + * meant to be read by RCU read-side critical sections. Returns the assigned + * value. + * + * Documents which pointers will be dereferenced by RCU read-side critical + * sections and adds the required memory barriers on architectures requiring + * them. It also makes sure the compiler does not reorder code initializing the + * data structure before its publication. + * + * Should match rcu_dereference_pointer(). + */ + +#define _rcu_assign_pointer(p, v) \ + ({ \ + if (!__builtin_constant_p(v) || \ + ((v) != NULL)) \ + wmb(); \ + STORE_SHARED(p, v); \ + }) + +/** + * _rcu_xchg_pointer - same as rcu_assign_pointer, but returns the previous + * pointer to the data structure, which can be safely freed after waiting for a + * quiescent state using synchronize_rcu(). + */ + +#define _rcu_xchg_pointer(p, v) \ + ({ \ + if (!__builtin_constant_p(v) || \ + ((v) != NULL)) \ + wmb(); \ + xchg(p, v); \ + }) + +/* + * Exchanges the pointer and waits for quiescent state. + * The pointer returned can be freed. + */ +#define _rcu_publish_content(p, v) \ + ({ \ + void *oldptr; \ + oldptr = _rcu_xchg_pointer(p, v); \ + synchronize_rcu(); \ + oldptr; \ + }) + +extern void synchronize_rcu(void); + +/* + * Reader thread registration. + */ +extern void rcu_register_thread(void); +extern void rcu_unregister_thread(void); + +#endif /* _URCU_QSBR_H */ -- 2.34.1