From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Sun, 30 Aug 2009 14:50:04 +0000 (-0400)
Subject: Move test programs to tests/ subdir
X-Git-Tag: v0.1~101
X-Git-Url: http://git.liburcu.org/?p=urcu.git;a=commitdiff_plain;h=833dbdb633ba44bfb71b4b965a83096d8e4e827b

Move test programs to tests/ subdir

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---

diff --git a/Makefile b/Makefile
index 98f9c4a..ab19097 100644
--- a/Makefile
+++ b/Makefile
@@ -1,170 +1 @@
-
-CFLAGS=-Wall -O2 -g -I.
-LDFLAGS=-lpthread
-
-HOSTTYPE=$(shell uname -m)
-
-ifeq ("${HOSTTYPE}","x86_64")
-ARCHTYPE=x86
-endif
-ifeq ("${HOSTTYPE}","i586")
-ARCHTYPE=x86
-endif
-ifeq ("${HOSTTYPE}","i686")
-ARCHTYPE=x86
-endif
-ifeq ("${HOSTTYPE}","powerpc")
-ARCHTYPE=ppc
-endif
-ifeq ("${HOSTTYPE}","ppc64")
-ARCHTYPE=ppc
-endif
-ifeq ("${HOSTTYPE}","s390")
-ARCHTYPE=s390
-endif
-ifeq ("${HOSTTYPE}","s390x")
-ARCHTYPE=s390
-endif
-
-ifeq ($(findstring ${ARCHTYPE},"x86 ppc"),)
-APIHEADER=api_gcc.h
-else
-APIHEADER=api_${ARCHTYPE}.h
-endif
-
-#debug
-#CFLAGS=-Wall -g
-
-#Build the library without using signals. Slower read-side.
-#CFLAGS+=-DCONFIG_URCU_AVOID_SIGNALS
-
-#Changing the signal number used by the library. SIGUSR1 by default.
-#CFLAGS+=-DSIGURCU=SIGUSR2
-
-SRC_DEP=`echo $^ | sed 's/[^ ]*\.h//g'`
-
-all: checkarch test_urcu test_urcu_dynamic_link test_urcu_timing \
-	test_rwlock_timing test_rwlock test_perthreadlock_timing \
-	test_perthreadlock test_urcu_yield test_urcu_mb \
-	urcu-asm.S test_qsbr_timing test_qsbr urcu-asm.o urcutorture \
-	urcutorture-yield liburcu.so test_mutex test_looplen test_urcu_gc \
-	test_urcu_gc_mb test_qsbr_gc test_qsbr_lgc test_urcu_lgc \
-	test_urcu_lgc_mb
-
-checkarch:
-ifeq (${ARCHTYPE},)
-	@echo "Architecture ${HOSTTYPE} is currently unsupported by liburcu"
-	@exit 1
-endif
-
-arch.h: arch_${ARCHTYPE}.h
-	cp -f arch_${ARCHTYPE}.h arch.h
-
-api.h: ${APIHEADER}
-	cp -f ${APIHEADER} api.h
-
-arch_atomic.h: arch_atomic_${ARCHTYPE}.h
-	cp -f arch_atomic_${ARCHTYPE}.h arch_atomic.h
-
-urcu.h: arch.h arch_atomic.h
-
-urcu-qsbr.h: arch.h arch_atomic.h
-
-test_urcu: urcu.o test_urcu.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_looplen: test_looplen.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_mb: urcu-mb.o test_urcu.c urcu.h
-	$(CC) -DCONFIG_URCU_AVOID_SIGNALS ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_gc: urcu.o test_urcu_gc.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_gc_mb: urcu-mb.o test_urcu_gc.c urcu.h
-	$(CC) -DCONFIG_URCU_AVOID_SIGNALS ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_lgc: urcu.o test_urcu_gc.c urcu.h
-	$(CC) -DTEST_LOCAL_GC ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_lgc_mb: urcu-mb.o test_urcu_gc.c urcu.h
-	$(CC) -DTEST_LOCAL_GC -DCONFIG_URCU_AVOID_SIGNALS ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_qsbr: urcu-qsbr.o test_qsbr.c urcu-qsbr.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_qsbr_gc: urcu-qsbr.o test_qsbr_gc.c urcu-qsbr.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_qsbr_lgc: urcu-qsbr.o test_qsbr_gc.c urcu-qsbr.h
-	$(CC) -DTEST_LOCAL_GC ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_rwlock: urcu.o test_rwlock.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_perthreadlock: urcu.o test_perthreadlock.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_mutex: urcu.o test_mutex.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_dynamic_link: urcu.o test_urcu.c urcu.h
-	$(CC) ${CFLAGS} -DDYNAMIC_LINK_TEST $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_yield: urcu-yield.o test_urcu.c urcu.h
-	$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_timing: urcu.o test_urcu_timing.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_qsbr_timing: urcu-qsbr.o test_qsbr_timing.c urcu-qsbr.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_rwlock_timing: urcu.o test_rwlock_timing.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_perthreadlock_timing: urcu.o test_perthreadlock_timing.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-urcu.o: urcu.c urcu.h
-	$(CC) -fPIC ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
-
-urcu-mb.o: urcu.c urcu.h
-	$(CC) -fPIC -DCONFIG_URCU_AVOID_SIGNALS ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
-
-urcu-qsbr.o: urcu-qsbr.c urcu-qsbr.h
-	$(CC) -fPIC ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
-
-liburcu.so: urcu.o
-	$(CC) -fPIC -shared -o $@ $<
-
-urcu-yield.o: urcu.c urcu.h
-	$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
-
-urcu-asm.S: urcu-asm.c urcu.h
-	$(CC) ${CFLAGS} -S -o $@ $(SRC_DEP)
-
-urcu-asm.o: urcu-asm.c urcu.h
-	$(CC) ${CFLAGS} -c -o $@ $(SRC_DEP)
-
-urcutorture: urcutorture.c urcu.o urcu.h rcutorture.h api.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-urcutorture-yield: urcutorture.c urcu-yield.o urcu.h rcutorture.h api.h
-	$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-.PHONY: clean install checkarch
-
-install: liburcu.so
-	cp -f liburcu.so /usr/lib/
-	cp -f arch.h arch_atomic.h compiler.h urcu.h urcu-static.h /usr/include/
-
-clean:
-	rm -f *.o test_urcu test_urcu_dynamic_link test_urcu_timing \
-	test_rwlock_timing test_rwlock test_perthreadlock_timing \
-	test_perthreadlock test_urcu_yield test_urcu_mb \
-	urcu-asm.S test_qsbr_timing test_qsbr urcutorture \
-	urcutorture-yield liburcu.so api.h arch.h arch_atomic.h \
-	test_mutex test_urcu_gc test_urcu_gc_mb
-
+include Makefile.inc
diff --git a/Makefile.inc b/Makefile.inc
new file mode 100644
index 0000000..870bde8
--- /dev/null
+++ b/Makefile.inc
@@ -0,0 +1,50 @@
+
+include Makefile.build.inc
+
+DIRS=tests
+
+all: checkarch liburcu.so urcu.o urcu-yield.o urcu-qsbr.o urcu-mb.o subdirs
+
+checkarch:
+ifeq (${ARCHTYPE},)
+	@echo "Architecture ${HOSTTYPE} is currently unsupported by liburcu"
+	@exit 1
+endif
+
+arch.h: arch_${ARCHTYPE}.h
+	cp -f arch_${ARCHTYPE}.h arch.h
+
+arch_atomic.h: arch_atomic_${ARCHTYPE}.h
+	cp -f arch_atomic_${ARCHTYPE}.h arch_atomic.h
+
+urcu.h: arch.h arch_atomic.h
+
+urcu-qsbr.h: arch.h arch_atomic.h
+
+urcu.o: urcu.c urcu.h
+	$(CC) -fPIC ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
+
+urcu-mb.o: urcu.c urcu.h
+	$(CC) -fPIC -DCONFIG_URCU_AVOID_SIGNALS ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
+
+urcu-qsbr.o: urcu-qsbr.c urcu-qsbr.h
+	$(CC) -fPIC ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
+
+liburcu.so: urcu.o
+	$(CC) -fPIC -shared -o $@ $<
+
+urcu-yield.o: urcu.c urcu.h
+	$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
+
+.PHONY: clean install checkarch tests
+
+subdirs:
+	-for d in ${DIRS}; do cd $${d}; ${MAKE} ${MFLAGS}; done
+
+install: liburcu.so
+	cp -f liburcu.so /usr/lib/
+	cp -f arch.h arch_atomic.h compiler.h urcu.h urcu-static.h /usr/include/
+
+clean:
+	rm -f *.o *.so arch.h arch_atomic.h
+	-for d in ${DIRS}; do cd $${d}; ${MAKE} clean; done
diff --git a/Makefile64 b/Makefile64
index ba36fec..cca1cdc 100644
--- a/Makefile64
+++ b/Makefile64
@@ -1,153 +1,3 @@
+include Makefile.inc
 
-#CFLAGS=-m64 -Wall -O2 -g -I.
-CFLAGS=-m64 -O2 -g -I.
-LDFLAGS=-lpthread
-
-HOSTTYPE=$(shell uname -m)
-
-ifeq ("${HOSTTYPE}","x86_64")
-ARCHTYPE=x86
-endif
-ifeq ("${HOSTTYPE}","powerpc")
-ARCHTYPE=ppc
-endif
-ifeq ("${HOSTTYPE}","ppc64")
-ARCHTYPE=ppc
-endif
-
-#debug
-#CFLAGS=-m64 -Wall -g
-
-#Build the library without using signals. Slower read-side.
-#CFLAGS+=-DCONFIG_URCU_AVOID_SIGNALS
-
-#Changing the signal number used by the library. SIGUSR1 by default.
-#CFLAGS+=-DSIGURCU=SIGUSR2
-
-SRC_DEP=`echo $^ | sed 's/[^ ]*\.h//g'`
-
-all: checkarch test_urcu test_urcu_dynamic_link test_urcu_timing \
-	test_rwlock_timing test_rwlock test_perthreadlock_timing \
-	test_perthreadlock test_urcu_yield test_urcu_mb \
-	urcu-asm.S test_qsbr_timing test_qsbr urcu-asm.o urcutorture \
-	urcutorture-yield liburcu.so test_mutex test_looplen test_urcu_gc \
-	test_urcu_gc_mb test_qsbr_gc test_qsbr_lgc test_urcu_lgc \
-	test_urcu_lgc_mb
-
-checkarch:
-ifeq (${ARCHTYPE},)
-	@echo "Architecture ${HOSTTYPE} is currently unsupported by liburcu"
-	@exit 1
-endif
-
-arch.h: arch_${ARCHTYPE}.h
-	cp -f arch_${ARCHTYPE}.h arch.h
-
-api.h: api_${ARCHTYPE}.h
-	cp -f api_${ARCHTYPE}.h api.h
-
-arch_atomic.h: arch_atomic_${ARCHTYPE}.h
-	cp -f arch_atomic_${ARCHTYPE}.h arch_atomic.h
-
-urcu.h: arch.h api.h arch_atomic.h
-
-urcu-qsbr.h: arch.h api.h arch_atomic.h
-
-test_urcu: urcu.o test_urcu.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_looplen: test_looplen.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_mb: urcu-mb.o test_urcu.c urcu.h
-	$(CC) -DCONFIG_URCU_AVOID_SIGNALS ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_gc: urcu.o test_urcu_gc.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_gc_mb: urcu-mb.o test_urcu_gc.c urcu.h
-	$(CC) -DCONFIG_URCU_AVOID_SIGNALS ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_lgc: urcu.o test_urcu_gc.c urcu.h
-	$(CC) -DTEST_LOCAL_GC ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_lgc_mb: urcu-mb.o test_urcu_gc.c urcu.h
-	$(CC) -DTEST_LOCAL_GC -DCONFIG_URCU_AVOID_SIGNALS ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_qsbr: urcu-qsbr.o test_qsbr.c urcu-qsbr.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_qsbr_gc: urcu-qsbr.o test_qsbr_gc.c urcu-qsbr.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_qsbr_lgc: urcu-qsbr.o test_qsbr_gc.c urcu-qsbr.h
-	$(CC) -DTEST_LOCAL_GC ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_rwlock: urcu.o test_rwlock.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_perthreadlock: urcu.o test_perthreadlock.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_mutex: urcu.o test_mutex.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_dynamic_link: urcu.o test_urcu.c urcu.h
-	$(CC) ${CFLAGS} -DDYNAMIC_LINK_TEST $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_yield: urcu-yield.o test_urcu.c urcu.h
-	$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_urcu_timing: urcu.o test_urcu_timing.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_qsbr_timing: urcu-qsbr.o test_qsbr_timing.c urcu-qsbr.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_rwlock_timing: urcu.o test_rwlock_timing.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-test_perthreadlock_timing: urcu.o test_perthreadlock_timing.c urcu.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-urcu.o: urcu.c urcu.h
-	$(CC) -fPIC ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
-
-urcu-mb.o: urcu.c urcu.h
-	$(CC) -fPIC -DCONFIG_URCU_AVOID_SIGNALS ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
-
-urcu-qsbr.o: urcu-qsbr.c urcu-qsbr.h
-	$(CC) -fPIC ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
-
-liburcu.so: urcu.o
-	$(CC) -m64 -fPIC -shared -o $@ $<
-
-urcu-yield.o: urcu.c urcu.h
-	$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -c -o $@ $(SRC_DEP)
-
-urcu-asm.S: urcu-asm.c urcu.h
-	$(CC) ${CFLAGS} -S -o $@ $(SRC_DEP)
-
-urcu-asm.o: urcu-asm.c urcu.h
-	$(CC) ${CFLAGS} -c -o $@ $(SRC_DEP)
-
-urcutorture: urcutorture.c urcu.o urcu.h rcutorture.h
-	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-urcutorture-yield: urcutorture.c urcu-yield.o urcu.h rcutorture.h
-	$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
-
-.PHONY: clean install checkarch
-
-install: liburcu.so
-	cp -f liburcu.so /usr/lib/
-	cp -f arch.h arch_atomic.h compiler.h urcu.h urcu-static.h /usr/include/
-
-clean:
-	rm -f *.o test_urcu test_urcu_dynamic_link test_urcu_timing \
-	test_rwlock_timing test_rwlock test_perthreadlock_timing \
-	test_perthreadlock test_urcu_yield test_urcu_mb \
-	urcu-asm.S test_qsbr_timing test_qsbr urcutorture \
-	urcutorture-yield liburcu.so api.h arch.h arch_atomic.h \
-	test_mutex test_urcu_gc test_urcu_gc_mb
-
+CFLAGS+=-m64
diff --git a/api_gcc.h b/api_gcc.h
deleted file mode 100644
index 1ad7345..0000000
--- a/api_gcc.h
+++ /dev/null
@@ -1,1328 +0,0 @@
-#define _INCLUDE_API_H
-
-/*
- * common.h: Common Linux kernel-isms.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; but version 2 of the License only due
- * to code included from the Linux kernel.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (c) 2006 Paul E. McKenney, IBM.
- *
- * Much code taken from the Linux kernel.  For such code, the option
- * to redistribute under later versions of GPL might not be available.
- */
-
-#ifndef __always_inline
-#define __always_inline inline
-#endif
-
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
-
-#ifdef __ASSEMBLY__
-#  define stringify_in_c(...)   __VA_ARGS__
-#  define ASM_CONST(x)          x
-#else
-/* This version of stringify will deal with commas... */
-#  define __stringify_in_c(...) #__VA_ARGS__
-#  define stringify_in_c(...)   __stringify_in_c(__VA_ARGS__) " "
-#  define __ASM_CONST(x)        x##UL
-#  define ASM_CONST(x)          __ASM_CONST(x)
-#endif
-
-
-/*
- * arch-i386.h: Expose x86 atomic instructions.  80486 and better only.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, but version 2 only due to inclusion
- * of Linux-kernel code.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (c) 2006 Paul E. McKenney, IBM.
- *
- * Much code taken from the Linux kernel.  For such code, the option
- * to redistribute under later versions of GPL might not be available.
- */
-
-/*
- * Machine parameters.
- */
-
-#define CACHE_LINE_SIZE 64
-#define ____cacheline_internodealigned_in_smp \
-	__attribute__((__aligned__(1 << 6)))
-
-#define LOCK_PREFIX "lock ; "
-
-/*
- * Atomic data structure, initialization, and access.
- */
-
-typedef struct { volatile int counter; } atomic_t;
-
-#define ATOMIC_INIT(i)  { (i) }
-
-#define atomic_read(v)		((v)->counter)
-#define atomic_set(v, i)	(((v)->counter) = (i))
-
-/*
- * Atomic operations.
- */
-
-/**
- * atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- * 
- * Atomically adds @i to @v.
- */
-
-static __inline__ void atomic_add(int i, atomic_t *v)
-{
-	(void)__sync_fetch_and_add(&v->counter, i);
-}
-
-/**
- * atomic_sub - subtract the atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- * 
- * Atomically subtracts @i from @v.
- */
-static __inline__ void atomic_sub(int i, atomic_t *v)
-{
-	(void)__sync_fetch_and_add(&v->counter, -i);
-}
-
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- * 
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
-{
-	return __sync_add_and_fetch(&v->counter, -i) == 0;
-}
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- * 
- * Atomically increments @v by 1.
- */ 
-static __inline__ void atomic_inc(atomic_t *v)
-{
-	(void)__sync_fetch_and_add(&v->counter, 1);
-}
-
-/**
- * atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- * 
- * Atomically decrements @v by 1.
- */ 
-static __inline__ void atomic_dec(atomic_t *v)
-{
-	(void)__sync_fetch_and_add(&v->counter, -1);
-}
-
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- * 
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */ 
-static __inline__ int atomic_dec_and_test(atomic_t *v)
-{
-	return __sync_add_and_fetch(&v->counter, -1) == 0;
-}
-
-/**
- * atomic_inc_and_test - increment and test 
- * @v: pointer of type atomic_t
- * 
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */ 
-static __inline__ int atomic_inc_and_test(atomic_t *v)
-{
-	return __sync_add_and_fetch(&v->counter, 1) == 0;
-}
-
-/**
- * atomic_add_negative - add and test if negative
- * @v: pointer of type atomic_t
- * @i: integer value to add
- * 
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */ 
-static __inline__ int atomic_add_negative(int i, atomic_t *v)
-{
-	return __sync_add_and_fetch(&v->counter, i) < 0;
-}
-
-/**
- * atomic_add_return - add and return
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static __inline__ int atomic_add_return(int i, atomic_t *v)
-{
-	return __sync_add_and_fetch(&v->counter, i);
-}
-
-static __inline__ int atomic_sub_return(int i, atomic_t *v)
-{
-	return atomic_add_return(-i,v);
-}
-
-static inline unsigned int
-cmpxchg(volatile long *ptr, long oldval, long newval)
-{
-	return __sync_val_compare_and_swap(ptr, oldval, newval);
-}
-
-#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-/**
- * atomic_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
- */
-#define atomic_add_unless(v, a, u)				\
-({								\
-	int c, old;						\
-	c = atomic_read(v);					\
-	for (;;) {						\
-		if (unlikely(c == (u)))				\
-			break;					\
-		old = atomic_cmpxchg((v), c, c + (a));		\
-		if (likely(old == c))				\
-			break;					\
-		c = old;					\
-	}							\
-	c != (u);						\
-})
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
-
-#define atomic_inc_return(v)  (atomic_add_return(1,v))
-#define atomic_dec_return(v)  (atomic_sub_return(1,v))
-
-/* Atomic operations are already serializing on x86 */
-#define smp_mb__before_atomic_dec()	barrier()
-#define smp_mb__after_atomic_dec()	barrier()
-#define smp_mb__before_atomic_inc()	barrier()
-#define smp_mb__after_atomic_inc()	barrier()
-
-/*
- * api_pthreads.h: API mapping to pthreads environment.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.  However, please note that much
- * of the code in this file derives from the Linux kernel, and that such
- * code may not be available except under GPLv2.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (c) 2006 Paul E. McKenney, IBM.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <limits.h>
-#include <sys/types.h>
-#define __USE_GNU
-#include <pthread.h>
-#include <sched.h>
-#include <sys/param.h>
-/* #include "atomic.h" */
-
-/*
- * Compiler magic.
- */
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
-#define container_of(ptr, type, member) ({			\
-	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
-	(type *)( (char *)__mptr - offsetof(type,member) );})
-
-/*
- * Default machine parameters.
- */
-
-#ifndef CACHE_LINE_SIZE
-#define CACHE_LINE_SIZE 128
-#endif /* #ifndef CACHE_LINE_SIZE */
-
-/*
- * Exclusive locking primitives.
- */
-
-typedef pthread_mutex_t spinlock_t;
-
-#define DEFINE_SPINLOCK(lock) spinlock_t lock = PTHREAD_MUTEX_INITIALIZER;
-#define __SPIN_LOCK_UNLOCKED(lockp) PTHREAD_MUTEX_INITIALIZER
-
-static void spin_lock_init(spinlock_t *sp)
-{
-	if (pthread_mutex_init(sp, NULL) != 0) {
-		perror("spin_lock_init:pthread_mutex_init");
-		exit(-1);
-	}
-}
-
-static void spin_lock(spinlock_t *sp)
-{
-	if (pthread_mutex_lock(sp) != 0) {
-		perror("spin_lock:pthread_mutex_lock");
-		exit(-1);
-	}
-}
-
-static void spin_unlock(spinlock_t *sp)
-{
-	if (pthread_mutex_unlock(sp) != 0) {
-		perror("spin_unlock:pthread_mutex_unlock");
-		exit(-1);
-	}
-}
-
-#define spin_lock_irqsave(l, f) do { f = 1; spin_lock(l); } while (0)
-#define spin_unlock_irqrestore(l, f) do { f = 0; spin_unlock(l); } while (0)
-
-/*
- * Thread creation/destruction primitives.
- */
-
-typedef pthread_t thread_id_t;
-
-#define NR_THREADS 128
-
-#define __THREAD_ID_MAP_EMPTY 0
-#define __THREAD_ID_MAP_WAITING 1
-thread_id_t __thread_id_map[NR_THREADS];
-spinlock_t __thread_id_map_mutex;
-
-#define for_each_thread(t) \
-	for (t = 0; t < NR_THREADS; t++)
-
-#define for_each_running_thread(t) \
-	for (t = 0; t < NR_THREADS; t++) \
-		if ((__thread_id_map[t] != __THREAD_ID_MAP_EMPTY) && \
-		    (__thread_id_map[t] != __THREAD_ID_MAP_WAITING))
-
-pthread_key_t thread_id_key;
-
-static int __smp_thread_id(void)
-{
-	int i;
-	thread_id_t tid = pthread_self();
-
-	for (i = 0; i < NR_THREADS; i++) {
-		if (__thread_id_map[i] == tid) {
-			long v = i + 1;  /* must be non-NULL. */
-
-			if (pthread_setspecific(thread_id_key, (void *)v) != 0) {
-				perror("pthread_setspecific");
-				exit(-1);
-			}
-			return i;
-		}
-	}
-	spin_lock(&__thread_id_map_mutex);
-	for (i = 0; i < NR_THREADS; i++) {
-		if (__thread_id_map[i] == tid)
-			spin_unlock(&__thread_id_map_mutex);
-			return i;
-	}
-	spin_unlock(&__thread_id_map_mutex);
-	fprintf(stderr, "smp_thread_id: Rogue thread, id: %d(%#x)\n",
-			(int)tid, (int)tid);
-	exit(-1);
-}
-
-static int smp_thread_id(void)
-{
-	void *id;
-
-	id = pthread_getspecific(thread_id_key);
-	if (id == NULL)
-		return __smp_thread_id();
-	return (long)(id - 1);
-}
-
-static thread_id_t create_thread(void *(*func)(void *), void *arg)
-{
-	thread_id_t tid;
-	int i;
-
-	spin_lock(&__thread_id_map_mutex);
-	for (i = 0; i < NR_THREADS; i++) {
-		if (__thread_id_map[i] == __THREAD_ID_MAP_EMPTY)
-			break;
-	}
-	if (i >= NR_THREADS) {
-		spin_unlock(&__thread_id_map_mutex);
-		fprintf(stderr, "Thread limit of %d exceeded!\n", NR_THREADS);
-		exit(-1);
-	}
-	__thread_id_map[i] = __THREAD_ID_MAP_WAITING;
-	spin_unlock(&__thread_id_map_mutex);
-	if (pthread_create(&tid, NULL, func, arg) != 0) {
-		perror("create_thread:pthread_create");
-		exit(-1);
-	}
-	__thread_id_map[i] = tid;
-	return tid;
-}
-
-static void *wait_thread(thread_id_t tid)
-{
-	int i;
-	void *vp;
-
-	for (i = 0; i < NR_THREADS; i++) {
-		if (__thread_id_map[i] == tid)
-			break;
-	}
-	if (i >= NR_THREADS){
-		fprintf(stderr, "wait_thread: bad tid = %d(%#x)\n",
-				(int)tid, (int)tid);
-		exit(-1);
-	}
-	if (pthread_join(tid, &vp) != 0) {
-		perror("wait_thread:pthread_join");
-		exit(-1);
-	}
-	__thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
-	return vp;
-}
-
-static void wait_all_threads(void)
-{
-	int i;
-	thread_id_t tid;
-
-	for (i = 1; i < NR_THREADS; i++) {
-		tid = __thread_id_map[i];
-		if (tid != __THREAD_ID_MAP_EMPTY &&
-		    tid != __THREAD_ID_MAP_WAITING)
-			(void)wait_thread(tid);
-	}
-}
-
-static void run_on(int cpu)
-{
-	cpu_set_t mask;
-
-	CPU_ZERO(&mask);
-	CPU_SET(cpu, &mask);
-	sched_setaffinity(0, sizeof(mask), &mask);
-}
-
-/*
- * timekeeping -- very crude -- should use MONOTONIC...
- */
-
-long long get_microseconds(void)
-{
-	struct timeval tv;
-
-	if (gettimeofday(&tv, NULL) != 0)
-		abort();
-	return ((long long)tv.tv_sec) * 1000000LL + (long long)tv.tv_usec;
-}
-
-/*
- * Per-thread variables.
- */
-
-#define DEFINE_PER_THREAD(type, name) \
-	struct { \
-		__typeof__(type) v \
-			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
-	} __per_thread_##name[NR_THREADS];
-#define DECLARE_PER_THREAD(type, name) extern DEFINE_PER_THREAD(type, name)
-
-#define per_thread(name, thread) __per_thread_##name[thread].v
-#define __get_thread_var(name) per_thread(name, smp_thread_id())
-
-#define init_per_thread(name, v) \
-	do { \
-		int __i_p_t_i; \
-		for (__i_p_t_i = 0; __i_p_t_i < NR_THREADS; __i_p_t_i++) \
-			per_thread(name, __i_p_t_i) = v; \
-	} while (0)
-
-/*
- * CPU traversal primitives.
- */
-
-#ifndef NR_CPUS
-#define NR_CPUS 16
-#endif /* #ifndef NR_CPUS */
-
-#define for_each_possible_cpu(cpu) \
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
-#define for_each_online_cpu(cpu) \
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
-
-/*
- * Per-CPU variables.
- */
-
-#define DEFINE_PER_CPU(type, name) \
-	struct { \
-		__typeof__(type) v \
-			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
-	} __per_cpu_##name[NR_CPUS]
-#define DECLARE_PER_CPU(type, name) extern DEFINE_PER_CPU(type, name)
-
-DEFINE_PER_THREAD(int, smp_processor_id);
-
-#define per_cpu(name, thread) __per_cpu_##name[thread].v
-#define __get_cpu_var(name) per_cpu(name, smp_processor_id())
-
-#define init_per_cpu(name, v) \
-	do { \
-		int __i_p_c_i; \
-		for (__i_p_c_i = 0; __i_p_c_i < NR_CPUS; __i_p_c_i++) \
-			per_cpu(name, __i_p_c_i) = v; \
-	} while (0)
-
-/*
- * CPU state checking (crowbarred).
- */
-
-#define idle_cpu(cpu) 0
-#define in_softirq() 1
-#define hardirq_count() 0
-#define PREEMPT_SHIFT   0
-#define SOFTIRQ_SHIFT   (PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT   (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-#define PREEMPT_BITS    8
-#define SOFTIRQ_BITS    8
-
-/*
- * CPU hotplug.
- */
-
-struct notifier_block {
-	int (*notifier_call)(struct notifier_block *, unsigned long, void *);
-	struct notifier_block *next;
-	int priority;
-};
-
-#define CPU_ONLINE		0x0002 /* CPU (unsigned)v is up */
-#define CPU_UP_PREPARE		0x0003 /* CPU (unsigned)v coming up */
-#define CPU_UP_CANCELED		0x0004 /* CPU (unsigned)v NOT coming up */
-#define CPU_DOWN_PREPARE	0x0005 /* CPU (unsigned)v going down */
-#define CPU_DOWN_FAILED		0x0006 /* CPU (unsigned)v NOT going down */
-#define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
-#define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
-				        * not handling interrupts, soon dead */
-#define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
-					* lock is dropped */
-
-/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
- * operation in progress
- */
-#define CPU_TASKS_FROZEN	0x0010
-
-#define CPU_ONLINE_FROZEN	(CPU_ONLINE | CPU_TASKS_FROZEN)
-#define CPU_UP_PREPARE_FROZEN	(CPU_UP_PREPARE | CPU_TASKS_FROZEN)
-#define CPU_UP_CANCELED_FROZEN	(CPU_UP_CANCELED | CPU_TASKS_FROZEN)
-#define CPU_DOWN_PREPARE_FROZEN	(CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
-#define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
-#define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
-#define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
-
-/* Hibernation and suspend events */
-#define PM_HIBERNATION_PREPARE	0x0001 /* Going to hibernate */
-#define PM_POST_HIBERNATION	0x0002 /* Hibernation finished */
-#define PM_SUSPEND_PREPARE	0x0003 /* Going to suspend the system */
-#define PM_POST_SUSPEND		0x0004 /* Suspend finished */
-#define PM_RESTORE_PREPARE	0x0005 /* Going to restore a saved image */
-#define PM_POST_RESTORE		0x0006 /* Restore failed */
-
-#define NOTIFY_DONE		0x0000		/* Don't care */
-#define NOTIFY_OK		0x0001		/* Suits me */
-#define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
-#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)
-						/* Bad/Veto action */
-/*
- * Clean way to return from the notifier and stop further calls.
- */
-#define NOTIFY_STOP		(NOTIFY_OK|NOTIFY_STOP_MASK)
-
-/*
- * Bug checks.
- */
-
-#define BUG_ON(c) do { if (!(c)) abort(); } while (0)
-
-/*
- * Initialization -- Must be called before calling any primitives.
- */
-
-static void smp_init(void)
-{
-	int i;
-
-	spin_lock_init(&__thread_id_map_mutex);
-	__thread_id_map[0] = pthread_self();
-	for (i = 1; i < NR_THREADS; i++)
-		__thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
-	init_per_thread(smp_processor_id, 0);
-	if (pthread_key_create(&thread_id_key, NULL) != 0) {
-		perror("pthread_key_create");
-		exit(-1);
-	}
-}
-
-/* Taken from the Linux kernel source tree, so GPLv2-only!!! */
-
-#ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-
-#define LIST_POISON1  ((void *) 0x00100100)
-#define LIST_POISON2  ((void *) 0x00200200)
-
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
-#define container_of(ptr, type, member) ({			\
-	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
-	(type *)( (char *)__mptr - offsetof(type,member) );})
-
-/*
- * Simple doubly linked list implementation.
- *
- * Some of the internal functions ("__xxx") are useful when
- * manipulating whole lists rather than single entries, as
- * sometimes we already know the next/prev entries and we can
- * generate better code by using them directly rather than
- * using the generic single-entry routines.
- */
-
-struct list_head {
-	struct list_head *next, *prev;
-};
-
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
-
-#define LIST_HEAD(name) \
-	struct list_head name = LIST_HEAD_INIT(name)
-
-static inline void INIT_LIST_HEAD(struct list_head *list)
-{
-	list->next = list;
-	list->prev = list;
-}
-
-/*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-#ifndef CONFIG_DEBUG_LIST
-static inline void __list_add(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next)
-{
-	next->prev = new;
-	new->next = next;
-	new->prev = prev;
-	prev->next = new;
-}
-#else
-extern void __list_add(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next);
-#endif
-
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
-	__list_add(new, head, head->next);
-}
-
-
-/**
- * list_add_tail - add a new entry
- * @new: new entry to be added
- * @head: list head to add it before
- *
- * Insert a new entry before the specified head.
- * This is useful for implementing queues.
- */
-static inline void list_add_tail(struct list_head *new, struct list_head *head)
-{
-	__list_add(new, head->prev, head);
-}
-
-/*
- * Delete a list entry by making the prev/next entries
- * point to each other.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
-	next->prev = prev;
-	prev->next = next;
-}
-
-/**
- * list_del - deletes entry from list.
- * @entry: the element to delete from the list.
- * Note: list_empty() on entry does not return true after this, the entry is
- * in an undefined state.
- */
-#ifndef CONFIG_DEBUG_LIST
-static inline void list_del(struct list_head *entry)
-{
-	__list_del(entry->prev, entry->next);
-	entry->next = LIST_POISON1;
-	entry->prev = LIST_POISON2;
-}
-#else
-extern void list_del(struct list_head *entry);
-#endif
-
-/**
- * list_replace - replace old entry by new one
- * @old : the element to be replaced
- * @new : the new element to insert
- *
- * If @old was empty, it will be overwritten.
- */
-static inline void list_replace(struct list_head *old,
-				struct list_head *new)
-{
-	new->next = old->next;
-	new->next->prev = new;
-	new->prev = old->prev;
-	new->prev->next = new;
-}
-
-static inline void list_replace_init(struct list_head *old,
-					struct list_head *new)
-{
-	list_replace(old, new);
-	INIT_LIST_HEAD(old);
-}
-
-/**
- * list_del_init - deletes entry from list and reinitialize it.
- * @entry: the element to delete from the list.
- */
-static inline void list_del_init(struct list_head *entry)
-{
-	__list_del(entry->prev, entry->next);
-	INIT_LIST_HEAD(entry);
-}
-
-/**
- * list_move - delete from one list and add as another's head
- * @list: the entry to move
- * @head: the head that will precede our entry
- */
-static inline void list_move(struct list_head *list, struct list_head *head)
-{
-	__list_del(list->prev, list->next);
-	list_add(list, head);
-}
-
-/**
- * list_move_tail - delete from one list and add as another's tail
- * @list: the entry to move
- * @head: the head that will follow our entry
- */
-static inline void list_move_tail(struct list_head *list,
-				  struct list_head *head)
-{
-	__list_del(list->prev, list->next);
-	list_add_tail(list, head);
-}
-
-/**
- * list_is_last - tests whether @list is the last entry in list @head
- * @list: the entry to test
- * @head: the head of the list
- */
-static inline int list_is_last(const struct list_head *list,
-				const struct list_head *head)
-{
-	return list->next == head;
-}
-
-/**
- * list_empty - tests whether a list is empty
- * @head: the list to test.
- */
-static inline int list_empty(const struct list_head *head)
-{
-	return head->next == head;
-}
-
-/**
- * list_empty_careful - tests whether a list is empty and not being modified
- * @head: the list to test
- *
- * Description:
- * tests whether a list is empty _and_ checks that no other CPU might be
- * in the process of modifying either member (next or prev)
- *
- * NOTE: using list_empty_careful() without synchronization
- * can only be safe if the only activity that can happen
- * to the list entry is list_del_init(). Eg. it cannot be used
- * if another CPU could re-list_add() it.
- */
-static inline int list_empty_careful(const struct list_head *head)
-{
-	struct list_head *next = head->next;
-	return (next == head) && (next == head->prev);
-}
-
-/**
- * list_is_singular - tests whether a list has just one entry.
- * @head: the list to test.
- */
-static inline int list_is_singular(const struct list_head *head)
-{
-	return !list_empty(head) && (head->next == head->prev);
-}
-
-static inline void __list_cut_position(struct list_head *list,
-		struct list_head *head, struct list_head *entry)
-{
-	struct list_head *new_first = entry->next;
-	list->next = head->next;
-	list->next->prev = list;
-	list->prev = entry;
-	entry->next = list;
-	head->next = new_first;
-	new_first->prev = head;
-}
-
-/**
- * list_cut_position - cut a list into two
- * @list: a new list to add all removed entries
- * @head: a list with entries
- * @entry: an entry within head, could be the head itself
- *	and if so we won't cut the list
- *
- * This helper moves the initial part of @head, up to and
- * including @entry, from @head to @list. You should
- * pass on @entry an element you know is on @head. @list
- * should be an empty list or a list you do not care about
- * losing its data.
- *
- */
-static inline void list_cut_position(struct list_head *list,
-		struct list_head *head, struct list_head *entry)
-{
-	if (list_empty(head))
-		return;
-	if (list_is_singular(head) &&
-		(head->next != entry && head != entry))
-		return;
-	if (entry == head)
-		INIT_LIST_HEAD(list);
-	else
-		__list_cut_position(list, head, entry);
-}
-
-static inline void __list_splice(const struct list_head *list,
-				 struct list_head *prev,
-				 struct list_head *next)
-{
-	struct list_head *first = list->next;
-	struct list_head *last = list->prev;
-
-	first->prev = prev;
-	prev->next = first;
-
-	last->next = next;
-	next->prev = last;
-}
-
-/**
- * list_splice - join two lists, this is designed for stacks
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice(const struct list_head *list,
-				struct list_head *head)
-{
-	if (!list_empty(list))
-		__list_splice(list, head, head->next);
-}
-
-/**
- * list_splice_tail - join two lists, each list being a queue
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice_tail(struct list_head *list,
-				struct list_head *head)
-{
-	if (!list_empty(list))
-		__list_splice(list, head->prev, head);
-}
-
-/**
- * list_splice_init - join two lists and reinitialise the emptied list.
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * The list at @list is reinitialised
- */
-static inline void list_splice_init(struct list_head *list,
-				    struct list_head *head)
-{
-	if (!list_empty(list)) {
-		__list_splice(list, head, head->next);
-		INIT_LIST_HEAD(list);
-	}
-}
-
-/**
- * list_splice_tail_init - join two lists and reinitialise the emptied list
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * Each of the lists is a queue.
- * The list at @list is reinitialised
- */
-static inline void list_splice_tail_init(struct list_head *list,
-					 struct list_head *head)
-{
-	if (!list_empty(list)) {
-		__list_splice(list, head->prev, head);
-		INIT_LIST_HEAD(list);
-	}
-}
-
-/**
- * list_entry - get the struct for this entry
- * @ptr:	the &struct list_head pointer.
- * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_entry(ptr, type, member) \
-	container_of(ptr, type, member)
-
-/**
- * list_first_entry - get the first element from a list
- * @ptr:	the list head to take the element from.
- * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
- *
- * Note, that list is expected to be not empty.
- */
-#define list_first_entry(ptr, type, member) \
-	list_entry((ptr)->next, type, member)
-
-/**
- * list_for_each	-	iterate over a list
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- */
-#define list_for_each(pos, head) \
-	for (pos = (head)->next; prefetch(pos->next), pos != (head); \
-        	pos = pos->next)
-
-/**
- * __list_for_each	-	iterate over a list
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- *
- * This variant differs from list_for_each() in that it's the
- * simplest possible list iteration code, no prefetching is done.
- * Use this for code that knows the list to be very short (empty
- * or 1 entry) most of the time.
- */
-#define __list_for_each(pos, head) \
-	for (pos = (head)->next; pos != (head); pos = pos->next)
-
-/**
- * list_for_each_prev	-	iterate over a list backwards
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- */
-#define list_for_each_prev(pos, head) \
-	for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \
-        	pos = pos->prev)
-
-/**
- * list_for_each_safe - iterate over a list safe against removal of list entry
- * @pos:	the &struct list_head to use as a loop cursor.
- * @n:		another &struct list_head to use as temporary storage
- * @head:	the head for your list.
- */
-#define list_for_each_safe(pos, n, head) \
-	for (pos = (head)->next, n = pos->next; pos != (head); \
-		pos = n, n = pos->next)
-
-/**
- * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
- * @pos:	the &struct list_head to use as a loop cursor.
- * @n:		another &struct list_head to use as temporary storage
- * @head:	the head for your list.
- */
-#define list_for_each_prev_safe(pos, n, head) \
-	for (pos = (head)->prev, n = pos->prev; \
-	     prefetch(pos->prev), pos != (head); \
-	     pos = n, n = pos->prev)
-
-/**
- * list_for_each_entry	-	iterate over list of given type
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_for_each_entry(pos, head, member)				\
-	for (pos = list_entry((head)->next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head); 	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_reverse - iterate backwards over list of given type.
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_for_each_entry_reverse(pos, head, member)			\
-	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
-	     prefetch(pos->member.prev), &pos->member != (head); 	\
-	     pos = list_entry(pos->member.prev, typeof(*pos), member))
-
-/**
- * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
- * @pos:	the type * to use as a start point
- * @head:	the head of the list
- * @member:	the name of the list_struct within the struct.
- *
- * Prepares a pos entry for use as a start point in list_for_each_entry_continue().
- */
-#define list_prepare_entry(pos, head, member) \
-	((pos) ? : list_entry(head, typeof(*pos), member))
-
-/**
- * list_for_each_entry_continue - continue iteration over list of given type
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Continue to iterate over list of given type, continuing after
- * the current position.
- */
-#define list_for_each_entry_continue(pos, head, member) 		\
-	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head);	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_continue_reverse - iterate backwards from the given point
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Start to iterate over list of given type backwards, continuing after
- * the current position.
- */
-#define list_for_each_entry_continue_reverse(pos, head, member)		\
-	for (pos = list_entry(pos->member.prev, typeof(*pos), member);	\
-	     prefetch(pos->member.prev), &pos->member != (head);	\
-	     pos = list_entry(pos->member.prev, typeof(*pos), member))
-
-/**
- * list_for_each_entry_from - iterate over list of given type from the current point
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate over list of given type, continuing from current position.
- */
-#define list_for_each_entry_from(pos, head, member) 			\
-	for (; prefetch(pos->member.next), &pos->member != (head);	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_for_each_entry_safe(pos, n, head, member)			\
-	for (pos = list_entry((head)->next, typeof(*pos), member),	\
-		n = list_entry(pos->member.next, typeof(*pos), member);	\
-	     &pos->member != (head); 					\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-/**
- * list_for_each_entry_safe_continue
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate over list of given type, continuing after current point,
- * safe against removal of list entry.
- */
-#define list_for_each_entry_safe_continue(pos, n, head, member) 		\
-	for (pos = list_entry(pos->member.next, typeof(*pos), member), 		\
-		n = list_entry(pos->member.next, typeof(*pos), member);		\
-	     &pos->member != (head);						\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-/**
- * list_for_each_entry_safe_from
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate over list of given type from current point, safe against
- * removal of list entry.
- */
-#define list_for_each_entry_safe_from(pos, n, head, member) 			\
-	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
-	     &pos->member != (head);						\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-/**
- * list_for_each_entry_safe_reverse
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate backwards over list of given type, safe against removal
- * of list entry.
- */
-#define list_for_each_entry_safe_reverse(pos, n, head, member)		\
-	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
-		n = list_entry(pos->member.prev, typeof(*pos), member);	\
-	     &pos->member != (head); 					\
-	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
-
-/*
- * Double linked lists with a single pointer list head.
- * Mostly useful for hash tables where the two pointer list head is
- * too wasteful.
- * You lose the ability to access the tail in O(1).
- */
-
-struct hlist_head {
-	struct hlist_node *first;
-};
-
-struct hlist_node {
-	struct hlist_node *next, **pprev;
-};
-
-#define HLIST_HEAD_INIT { .first = NULL }
-#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
-#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
-static inline void INIT_HLIST_NODE(struct hlist_node *h)
-{
-	h->next = NULL;
-	h->pprev = NULL;
-}
-
-static inline int hlist_unhashed(const struct hlist_node *h)
-{
-	return !h->pprev;
-}
-
-static inline int hlist_empty(const struct hlist_head *h)
-{
-	return !h->first;
-}
-
-static inline void __hlist_del(struct hlist_node *n)
-{
-	struct hlist_node *next = n->next;
-	struct hlist_node **pprev = n->pprev;
-	*pprev = next;
-	if (next)
-		next->pprev = pprev;
-}
-
-static inline void hlist_del(struct hlist_node *n)
-{
-	__hlist_del(n);
-	n->next = LIST_POISON1;
-	n->pprev = LIST_POISON2;
-}
-
-static inline void hlist_del_init(struct hlist_node *n)
-{
-	if (!hlist_unhashed(n)) {
-		__hlist_del(n);
-		INIT_HLIST_NODE(n);
-	}
-}
-
-static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
-{
-	struct hlist_node *first = h->first;
-	n->next = first;
-	if (first)
-		first->pprev = &n->next;
-	h->first = n;
-	n->pprev = &h->first;
-}
-
-/* next must be != NULL */
-static inline void hlist_add_before(struct hlist_node *n,
-					struct hlist_node *next)
-{
-	n->pprev = next->pprev;
-	n->next = next;
-	next->pprev = &n->next;
-	*(n->pprev) = n;
-}
-
-static inline void hlist_add_after(struct hlist_node *n,
-					struct hlist_node *next)
-{
-	next->next = n->next;
-	n->next = next;
-	next->pprev = &n->next;
-
-	if(next->next)
-		next->next->pprev  = &next->next;
-}
-
-/*
- * Move a list from one list head to another. Fixup the pprev
- * reference of the first entry if it exists.
- */
-static inline void hlist_move_list(struct hlist_head *old,
-				   struct hlist_head *new)
-{
-	new->first = old->first;
-	if (new->first)
-		new->first->pprev = &new->first;
-	old->first = NULL;
-}
-
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
-
-#define hlist_for_each(pos, head) \
-	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
-	     pos = pos->next)
-
-#define hlist_for_each_safe(pos, n, head) \
-	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
-	     pos = n)
-
-/**
- * hlist_for_each_entry	- iterate over list of given type
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry(tpos, pos, head, member)			 \
-	for (pos = (head)->first;					 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-/**
- * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_continue(tpos, pos, member)		 \
-	for (pos = (pos)->next;						 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-/**
- * hlist_for_each_entry_from - iterate over a hlist continuing from current point
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_from(tpos, pos, member)			 \
-	for (; pos && ({ prefetch(pos->next); 1;}) &&			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-/**
- * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @n:		another &struct hlist_node to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
-	for (pos = (head)->first;					 \
-	     pos && ({ n = pos->next; 1; }) && 				 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = n)
-
-#endif
diff --git a/api_ppc.h b/api_ppc.h
deleted file mode 100644
index 8a03faa..0000000
--- a/api_ppc.h
+++ /dev/null
@@ -1,1699 +0,0 @@
-/* MECHANICALLY GENERATED, DO NOT EDIT!!! */
-
-#define _INCLUDE_API_H
-
-/*
- * common.h: Common Linux kernel-isms.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; but version 2 of the License only due
- * to code included from the Linux kernel.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (c) 2006 Paul E. McKenney, IBM.
- *
- * Much code taken from the Linux kernel.  For such code, the option
- * to redistribute under later versions of GPL might not be available.
- */
-
-#ifndef __always_inline
-#define __always_inline inline
-#endif
-
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
-
-#ifdef __ASSEMBLY__
-#  define stringify_in_c(...)   __VA_ARGS__
-#  define ASM_CONST(x)          x
-#else
-/* This version of stringify will deal with commas... */
-#  define __stringify_in_c(...) #__VA_ARGS__
-#  define stringify_in_c(...)   __stringify_in_c(__VA_ARGS__) " "
-#  define __ASM_CONST(x)        x##UL
-#  define ASM_CONST(x)          __ASM_CONST(x)
-#endif
-
-
-/*
- * arch-ppc64.h: Expose PowerPC atomic instructions.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; but version 2 of the License only due
- * to code included from the Linux kernel.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (c) 2006 Paul E. McKenney, IBM.
- *
- * Much code taken from the Linux kernel.  For such code, the option
- * to redistribute under later versions of GPL might not be available.
- */
-
-/*
- * Machine parameters.
- */
-
-#define CONFIG_PPC64
-
-#define CACHE_LINE_SIZE 128
-#define ____cacheline_internodealigned_in_smp \
-	__attribute__((__aligned__(1 << 7)))
-
-/*
- * Atomic data structure, initialization, and access.
- */
-
-typedef struct { volatile int counter; } atomic_t;
-
-#define ATOMIC_INIT(i)  { (i) }
-
-#define atomic_read(v)		((v)->counter)
-#define atomic_set(v, i)	(((v)->counter) = (i))
-
-/*
- * Atomic operations.
- */
-
-#define LWSYNC lwsync
-#define PPC405_ERR77(ra,rb)
-#ifdef CONFIG_SMP
-#  define LWSYNC_ON_SMP stringify_in_c(LWSYNC) "\n"
-#  define ISYNC_ON_SMP "\n\tisync\n"
-#else
-#  define LWSYNC_ON_SMP
-#  define ISYNC_ON_SMP
-#endif
-
-
-/*
- * Atomic exchange
- *
- * Changes the memory location '*ptr' to be val and returns
- * the previous value stored there.
- */
-static __always_inline unsigned long
-__xchg_u32(volatile void *p, unsigned long val)
-{
-	unsigned long prev;
-
-	__asm__ __volatile__(
-	LWSYNC_ON_SMP
-"1:	lwarx	%0,0,%2 \n"
-	PPC405_ERR77(0,%2)
-"	stwcx.	%3,0,%2 \n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	: "=&r" (prev), "+m" (*(volatile unsigned int *)p)
-	: "r" (p), "r" (val)
-	: "cc", "memory");
-
-	return prev;
-}
-
-/*
- * Atomic exchange
- *
- * Changes the memory location '*ptr' to be val and returns
- * the previous value stored there.
- */
-static __always_inline unsigned long
-__xchg_u32_local(volatile void *p, unsigned long val)
-{
-	unsigned long prev;
-
-	__asm__ __volatile__(
-"1:	lwarx	%0,0,%2 \n"
-	PPC405_ERR77(0,%2)
-"	stwcx.	%3,0,%2 \n\
-	bne-	1b"
-	: "=&r" (prev), "+m" (*(volatile unsigned int *)p)
-	: "r" (p), "r" (val)
-	: "cc", "memory");
-
-	return prev;
-}
-
-#ifdef CONFIG_PPC64
-static __always_inline unsigned long
-__xchg_u64(volatile void *p, unsigned long val)
-{
-	unsigned long prev;
-
-	__asm__ __volatile__(
-	LWSYNC_ON_SMP
-"1:	ldarx	%0,0,%2 \n"
-	PPC405_ERR77(0,%2)
-"	stdcx.	%3,0,%2 \n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	: "=&r" (prev), "+m" (*(volatile unsigned long *)p)
-	: "r" (p), "r" (val)
-	: "cc", "memory");
-
-	return prev;
-}
-
-static __always_inline unsigned long
-__xchg_u64_local(volatile void *p, unsigned long val)
-{
-	unsigned long prev;
-
-	__asm__ __volatile__(
-"1:	ldarx	%0,0,%2 \n"
-	PPC405_ERR77(0,%2)
-"	stdcx.	%3,0,%2 \n\
-	bne-	1b"
-	: "=&r" (prev), "+m" (*(volatile unsigned long *)p)
-	: "r" (p), "r" (val)
-	: "cc", "memory");
-
-	return prev;
-}
-#endif
-
-/*
- * This function doesn't exist, so you'll get a linker error
- * if something tries to do an invalid xchg().
- */
-extern void __xchg_called_with_bad_pointer(void);
-
-static __always_inline unsigned long
-__xchg(volatile void *ptr, unsigned long x, unsigned int size)
-{
-	switch (size) {
-	case 4:
-		return __xchg_u32(ptr, x);
-#ifdef CONFIG_PPC64
-	case 8:
-		return __xchg_u64(ptr, x);
-#endif
-	}
-	__xchg_called_with_bad_pointer();
-	return x;
-}
-
-static __always_inline unsigned long
-__xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
-{
-	switch (size) {
-	case 4:
-		return __xchg_u32_local(ptr, x);
-#ifdef CONFIG_PPC64
-	case 8:
-		return __xchg_u64_local(ptr, x);
-#endif
-	}
-	__xchg_called_with_bad_pointer();
-	return x;
-}
-#define xchg(ptr,x)							     \
-  ({									     \
-     __typeof__(*(ptr)) _x_ = (x);					     \
-     (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
-  })
-
-#define xchg_local(ptr,x)						     \
-  ({									     \
-     __typeof__(*(ptr)) _x_ = (x);					     \
-     (__typeof__(*(ptr))) __xchg_local((ptr),				     \
-     		(unsigned long)_x_, sizeof(*(ptr))); 			     \
-  })
-
-/*
- * Compare and exchange - if *p == old, set it to new,
- * and return the old value of *p.
- */
-#define __HAVE_ARCH_CMPXCHG	1
-
-static __always_inline unsigned long
-__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
-{
-	unsigned int prev;
-
-	__asm__ __volatile__ (
-	LWSYNC_ON_SMP
-"1:	lwarx	%0,0,%2		# __cmpxchg_u32\n\
-	cmpw	0,%0,%3\n\
-	bne-	2f\n"
-	PPC405_ERR77(0,%2)
-"	stwcx.	%4,0,%2\n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	"\n\
-2:"
-	: "=&r" (prev), "+m" (*p)
-	: "r" (p), "r" (old), "r" (new)
-	: "cc", "memory");
-
-	return prev;
-}
-
-static __always_inline unsigned long
-__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old,
-			unsigned long new)
-{
-	unsigned int prev;
-
-	__asm__ __volatile__ (
-"1:	lwarx	%0,0,%2		# __cmpxchg_u32\n\
-	cmpw	0,%0,%3\n\
-	bne-	2f\n"
-	PPC405_ERR77(0,%2)
-"	stwcx.	%4,0,%2\n\
-	bne-	1b"
-	"\n\
-2:"
-	: "=&r" (prev), "+m" (*p)
-	: "r" (p), "r" (old), "r" (new)
-	: "cc", "memory");
-
-	return prev;
-}
-
-#ifdef CONFIG_PPC64
-static __always_inline unsigned long
-__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
-{
-	unsigned long prev;
-
-	__asm__ __volatile__ (
-	LWSYNC_ON_SMP
-"1:	ldarx	%0,0,%2		# __cmpxchg_u64\n\
-	cmpd	0,%0,%3\n\
-	bne-	2f\n\
-	stdcx.	%4,0,%2\n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	"\n\
-2:"
-	: "=&r" (prev), "+m" (*p)
-	: "r" (p), "r" (old), "r" (new)
-	: "cc", "memory");
-
-	return prev;
-}
-
-static __always_inline unsigned long
-__cmpxchg_u64_local(volatile unsigned long *p, unsigned long old,
-			unsigned long new)
-{
-	unsigned long prev;
-
-	__asm__ __volatile__ (
-"1:	ldarx	%0,0,%2		# __cmpxchg_u64\n\
-	cmpd	0,%0,%3\n\
-	bne-	2f\n\
-	stdcx.	%4,0,%2\n\
-	bne-	1b"
-	"\n\
-2:"
-	: "=&r" (prev), "+m" (*p)
-	: "r" (p), "r" (old), "r" (new)
-	: "cc", "memory");
-
-	return prev;
-}
-#endif
-
-/* This function doesn't exist, so you'll get a linker error
-   if something tries to do an invalid cmpxchg().  */
-extern void __cmpxchg_called_with_bad_pointer(void);
-
-static __always_inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
-	  unsigned int size)
-{
-	switch (size) {
-	case 4:
-		return __cmpxchg_u32(ptr, old, new);
-#ifdef CONFIG_PPC64
-	case 8:
-		return __cmpxchg_u64(ptr, old, new);
-#endif
-	}
-	__cmpxchg_called_with_bad_pointer();
-	return old;
-}
-
-static __always_inline unsigned long
-__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
-	  unsigned int size)
-{
-	switch (size) {
-	case 4:
-		return __cmpxchg_u32_local(ptr, old, new);
-#ifdef CONFIG_PPC64
-	case 8:
-		return __cmpxchg_u64_local(ptr, old, new);
-#endif
-	}
-	__cmpxchg_called_with_bad_pointer();
-	return old;
-}
-
-#define cmpxchg(ptr, o, n)						 \
-  ({									 \
-     __typeof__(*(ptr)) _o_ = (o);					 \
-     __typeof__(*(ptr)) _n_ = (n);					 \
-     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
-				    (unsigned long)_n_, sizeof(*(ptr))); \
-  })
-
-
-#define cmpxchg_local(ptr, o, n)					 \
-  ({									 \
-     __typeof__(*(ptr)) _o_ = (o);					 \
-     __typeof__(*(ptr)) _n_ = (n);					 \
-     (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_,	 \
-				    (unsigned long)_n_, sizeof(*(ptr))); \
-  })
-
-#ifdef CONFIG_PPC64
-/*
- * We handle most unaligned accesses in hardware. On the other hand 
- * unaligned DMA can be very expensive on some ppc64 IO chips (it does
- * powers of 2 writes until it reaches sufficient alignment).
- *
- * Based on this we disable the IP header alignment in network drivers.
- * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining
- * cacheline alignment of buffers.
- */
-#define NET_IP_ALIGN	0
-#define NET_SKB_PAD	L1_CACHE_BYTES
-
-#define cmpxchg64(ptr, o, n)						\
-  ({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg((ptr), (o), (n));					\
-  })
-#define cmpxchg64_local(ptr, o, n)					\
-  ({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_local((ptr), (o), (n));					\
-  })
-#endif
-
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-/**
- * atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- * 
- * Atomically adds @a to @v.
- */
-static __inline__ void atomic_add(int a, atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-	"1:	lwarx	%0,0,%3		# atomic_add\n\
-		add	%0,%2,%0 \n\
-		stwcx.	%0,0,%3 \n\
-		bne-	1b"
-		: "=&r" (t), "+m" (v->counter)
-		: "r" (a), "r" (&v->counter)
-		: "cc");
-}
-
-/**
- * atomic_sub - subtract the atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- * 
- * Atomically subtracts @a from @v.
- */
-static __inline__ void atomic_sub(int a, atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-	"1:	lwarx	%0,0,%3		# atomic_sub \n\
-		subf	%0,%2,%0 \n\
-		stwcx.	%0,0,%3 \n\
-		bne-	1b"
-		: "=&r" (t), "+m" (v->counter)
-		: "r" (a), "r" (&v->counter)
-		: "cc");
-}
-
-static __inline__ atomic_sub_return(int a, atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-		"lwsync\n\
-	1:	lwarx	%0,0,%2		# atomic_sub_return\n\
-		subf	%0,%1,%0\n\
-		stwcx.	%0,0,%2 \n\
-		bne-	1b \n\
-		isync"
-		: "=&r" (t)
-		: "r" (a), "r" (&v->counter)
-		: "cc", "memory");
-
-	return t;
-}
-
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- * 
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static __inline__ int atomic_sub_and_test(int a, atomic_t *v)
-{
-	return atomic_sub_return(a, v) == 0;
-}
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- * 
- * Atomically increments @v by 1.
- */ 
-static __inline__ void atomic_inc(atomic_t *v)
-{
-	atomic_add(1, v);
-}
-
-/**
- * atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- * 
- * Atomically decrements @v by 1.
- */ 
-static __inline__ void atomic_dec(atomic_t *v)
-{
-	atomic_sub(1, v);
-}
-
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- * 
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */ 
-static __inline__ int atomic_dec_and_test(atomic_t *v)
-{
-	return atomic_sub_and_test(1, v);
-}
-
-/**
- * atomic_inc_and_test - increment and test 
- * @v: pointer of type atomic_t
- * 
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */ 
-static __inline__ int atomic_inc_and_test(atomic_t *v)
-{
-	return atomic_inc_return(v);
-}
-
-/**
- * atomic_add_return - add and return
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static __inline__ int atomic_add_return(int a, atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-		"lwsync \n\
-	1:	lwarx	%0,0,%2		 # atomic_add_return \n\
-		add	%0,%1,%0 \n\
-		stwcx.	%0,0,%2 \n\
-		bne-	1b \n\
-		isync"
-		: "=&r" (t)
-		: "r" (a), "r" (&v->counter)
-		: "cc", "memory");
-
-	return t;
-}
-
-/**
- * atomic_add_negative - add and test if negative
- * @v: pointer of type atomic_t
- * @i: integer value to add
- * 
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */ 
-static __inline__ int atomic_add_negative(int a, atomic_t *v)
-{
-	return atomic_add_return(a, v) < 0;
-}
-
-/**
- * atomic_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
- */
-static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
-{
-	int t;
-
-	__asm__ __volatile__(
-		"lwsync \n\
-	1:	lwarx	%0,0,%1		# atomic_add_unless\n\
-		cmpd	0,%0,%3 \n\
-		beq-	2f \n\
-		add	%0,%2,%0 \n\
-		stwcx.	%0,0,%1 \n\
-		bne-	1b \n\
-		isync \n\
-		subf	%0,%2,%0 \n\
-	2:"
-		: "=&r" (t)
-		: "r" (&v->counter), "r" (a), "r" (u)
-		: "cc", "memory");
-
-	return t != u;
-}
-
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
-
-#define atomic_inc_return(v)  (atomic_add_return(1,v))
-#define atomic_dec_return(v)  (atomic_sub_return(1,v))
-
-/* Atomic operations are already serializing on x86 */
-#define smp_mb__before_atomic_dec()	smp_mb()
-#define smp_mb__after_atomic_dec()	smp_mb()
-#define smp_mb__before_atomic_inc()	smp_mb()
-#define smp_mb__after_atomic_inc()	smp_mb()
-
-/*
- * api_pthreads.h: API mapping to pthreads environment.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.  However, please note that much
- * of the code in this file derives from the Linux kernel, and that such
- * code may not be available except under GPLv2.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (c) 2006 Paul E. McKenney, IBM.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <limits.h>
-#include <sys/types.h>
-#define __USE_GNU
-#include <pthread.h>
-#include <sched.h>
-#include <sys/param.h>
-/* #include "atomic.h" */
-
-/*
- * Compiler magic.
- */
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
-#define container_of(ptr, type, member) ({			\
-	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
-	(type *)( (char *)__mptr - offsetof(type,member) );})
-
-/*
- * Default machine parameters.
- */
-
-#ifndef CACHE_LINE_SIZE
-#define CACHE_LINE_SIZE 128
-#endif /* #ifndef CACHE_LINE_SIZE */
-
-/*
- * Exclusive locking primitives.
- */
-
-typedef pthread_mutex_t spinlock_t;
-
-#define DEFINE_SPINLOCK(lock) spinlock_t lock = PTHREAD_MUTEX_INITIALIZER;
-#define __SPIN_LOCK_UNLOCKED(lockp) PTHREAD_MUTEX_INITIALIZER
-
-static void spin_lock_init(spinlock_t *sp)
-{
-	if (pthread_mutex_init(sp, NULL) != 0) {
-		perror("spin_lock_init:pthread_mutex_init");
-		exit(-1);
-	}
-}
-
-static void spin_lock(spinlock_t *sp)
-{
-	if (pthread_mutex_lock(sp) != 0) {
-		perror("spin_lock:pthread_mutex_lock");
-		exit(-1);
-	}
-}
-
-static void spin_unlock(spinlock_t *sp)
-{
-	if (pthread_mutex_unlock(sp) != 0) {
-		perror("spin_unlock:pthread_mutex_unlock");
-		exit(-1);
-	}
-}
-
-#define spin_lock_irqsave(l, f) do { f = 1; spin_lock(l); } while (0)
-#define spin_unlock_irqrestore(l, f) do { f = 0; spin_unlock(l); } while (0)
-
-/*
- * Thread creation/destruction primitives.
- */
-
-typedef pthread_t thread_id_t;
-
-#define NR_THREADS 128
-
-#define __THREAD_ID_MAP_EMPTY 0
-#define __THREAD_ID_MAP_WAITING 1
-thread_id_t __thread_id_map[NR_THREADS];
-spinlock_t __thread_id_map_mutex;
-
-#define for_each_thread(t) \
-	for (t = 0; t < NR_THREADS; t++)
-
-#define for_each_running_thread(t) \
-	for (t = 0; t < NR_THREADS; t++) \
-		if ((__thread_id_map[t] != __THREAD_ID_MAP_EMPTY) && \
-		    (__thread_id_map[t] != __THREAD_ID_MAP_WAITING))
-
-#define for_each_tid(t, tid) \
-	for (t = 0; t < NR_THREADS; t++) \
-		if ((((tid) = __thread_id_map[t]) != __THREAD_ID_MAP_EMPTY) && \
-		    ((tid) != __THREAD_ID_MAP_WAITING))
-
-pthread_key_t thread_id_key;
-
-static int __smp_thread_id(void)
-{
-	int i;
-	thread_id_t tid = pthread_self();
-
-	for (i = 0; i < NR_THREADS; i++) {
-		if (__thread_id_map[i] == tid) {
-			long v = i + 1;  /* must be non-NULL. */
-
-			if (pthread_setspecific(thread_id_key, (void *)v) != 0) {
-				perror("pthread_setspecific");
-				exit(-1);
-			}
-			return i;
-		}
-	}
-	spin_lock(&__thread_id_map_mutex);
-	for (i = 0; i < NR_THREADS; i++) {
-		if (__thread_id_map[i] == tid)
-			spin_unlock(&__thread_id_map_mutex);
-			return i;
-	}
-	spin_unlock(&__thread_id_map_mutex);
-	fprintf(stderr, "smp_thread_id: Rogue thread, id: %d(%#x)\n",
-			(int)tid, (int)tid);
-	exit(-1);
-}
-
-static int smp_thread_id(void)
-{
-	void *id;
-
-	id = pthread_getspecific(thread_id_key);
-	if (id == NULL)
-		return __smp_thread_id();
-	return (long)(id - 1);
-}
-
-static thread_id_t create_thread(void *(*func)(void *), void *arg)
-{
-	thread_id_t tid;
-	int i;
-
-	spin_lock(&__thread_id_map_mutex);
-	for (i = 0; i < NR_THREADS; i++) {
-		if (__thread_id_map[i] == __THREAD_ID_MAP_EMPTY)
-			break;
-	}
-	if (i >= NR_THREADS) {
-		spin_unlock(&__thread_id_map_mutex);
-		fprintf(stderr, "Thread limit of %d exceeded!\n", NR_THREADS);
-		exit(-1);
-	}
-	__thread_id_map[i] = __THREAD_ID_MAP_WAITING;
-	spin_unlock(&__thread_id_map_mutex);
-	if (pthread_create(&tid, NULL, func, arg) != 0) {
-		perror("create_thread:pthread_create");
-		exit(-1);
-	}
-	__thread_id_map[i] = tid;
-	return tid;
-}
-
-static void *wait_thread(thread_id_t tid)
-{
-	int i;
-	void *vp;
-
-	for (i = 0; i < NR_THREADS; i++) {
-		if (__thread_id_map[i] == tid)
-			break;
-	}
-	if (i >= NR_THREADS){
-		fprintf(stderr, "wait_thread: bad tid = %d(%#x)\n",
-				(int)tid, (int)tid);
-		exit(-1);
-	}
-	if (pthread_join(tid, &vp) != 0) {
-		perror("wait_thread:pthread_join");
-		exit(-1);
-	}
-	__thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
-	return vp;
-}
-
-static void wait_all_threads(void)
-{
-	int i;
-	thread_id_t tid;
-
-	for (i = 1; i < NR_THREADS; i++) {
-		tid = __thread_id_map[i];
-		if (tid != __THREAD_ID_MAP_EMPTY &&
-		    tid != __THREAD_ID_MAP_WAITING)
-			(void)wait_thread(tid);
-	}
-}
-
-static void run_on(int cpu)
-{
-	cpu_set_t mask;
-
-	CPU_ZERO(&mask);
-	CPU_SET(cpu, &mask);
-	sched_setaffinity(0, sizeof(mask), &mask);
-}
-
-/*
- * timekeeping -- very crude -- should use MONOTONIC...
- */
-
-long long get_microseconds(void)
-{
-	struct timeval tv;
-
-	if (gettimeofday(&tv, NULL) != 0)
-		abort();
-	return ((long long)tv.tv_sec) * 1000000LL + (long long)tv.tv_usec;
-}
-
-/*
- * Per-thread variables.
- */
-
-#define DEFINE_PER_THREAD(type, name) \
-	struct { \
-		__typeof__(type) v \
-			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
-	} __per_thread_##name[NR_THREADS];
-#define DECLARE_PER_THREAD(type, name) extern DEFINE_PER_THREAD(type, name)
-
-#define per_thread(name, thread) __per_thread_##name[thread].v
-#define __get_thread_var(name) per_thread(name, smp_thread_id())
-
-#define init_per_thread(name, v) \
-	do { \
-		int __i_p_t_i; \
-		for (__i_p_t_i = 0; __i_p_t_i < NR_THREADS; __i_p_t_i++) \
-			per_thread(name, __i_p_t_i) = v; \
-	} while (0)
-
-/*
- * CPU traversal primitives.
- */
-
-#ifndef NR_CPUS
-#define NR_CPUS 16
-#endif /* #ifndef NR_CPUS */
-
-#define for_each_possible_cpu(cpu) \
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
-#define for_each_online_cpu(cpu) \
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
-
-/*
- * Per-CPU variables.
- */
-
-#define DEFINE_PER_CPU(type, name) \
-	struct { \
-		__typeof__(type) v \
-			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
-	} __per_cpu_##name[NR_CPUS]
-#define DECLARE_PER_CPU(type, name) extern DEFINE_PER_CPU(type, name)
-
-DEFINE_PER_THREAD(int, smp_processor_id);
-
-#define per_cpu(name, thread) __per_cpu_##name[thread].v
-#define __get_cpu_var(name) per_cpu(name, smp_processor_id())
-
-#define init_per_cpu(name, v) \
-	do { \
-		int __i_p_c_i; \
-		for (__i_p_c_i = 0; __i_p_c_i < NR_CPUS; __i_p_c_i++) \
-			per_cpu(name, __i_p_c_i) = v; \
-	} while (0)
-
-/*
- * CPU state checking (crowbarred).
- */
-
-#define idle_cpu(cpu) 0
-#define in_softirq() 1
-#define hardirq_count() 0
-#define PREEMPT_SHIFT   0
-#define SOFTIRQ_SHIFT   (PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT   (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-#define PREEMPT_BITS    8
-#define SOFTIRQ_BITS    8
-
-/*
- * CPU hotplug.
- */
-
-struct notifier_block {
-	int (*notifier_call)(struct notifier_block *, unsigned long, void *);
-	struct notifier_block *next;
-	int priority;
-};
-
-#define CPU_ONLINE		0x0002 /* CPU (unsigned)v is up */
-#define CPU_UP_PREPARE		0x0003 /* CPU (unsigned)v coming up */
-#define CPU_UP_CANCELED		0x0004 /* CPU (unsigned)v NOT coming up */
-#define CPU_DOWN_PREPARE	0x0005 /* CPU (unsigned)v going down */
-#define CPU_DOWN_FAILED		0x0006 /* CPU (unsigned)v NOT going down */
-#define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
-#define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
-				        * not handling interrupts, soon dead */
-#define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
-					* lock is dropped */
-
-/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
- * operation in progress
- */
-#define CPU_TASKS_FROZEN	0x0010
-
-#define CPU_ONLINE_FROZEN	(CPU_ONLINE | CPU_TASKS_FROZEN)
-#define CPU_UP_PREPARE_FROZEN	(CPU_UP_PREPARE | CPU_TASKS_FROZEN)
-#define CPU_UP_CANCELED_FROZEN	(CPU_UP_CANCELED | CPU_TASKS_FROZEN)
-#define CPU_DOWN_PREPARE_FROZEN	(CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
-#define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
-#define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
-#define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
-
-/* Hibernation and suspend events */
-#define PM_HIBERNATION_PREPARE	0x0001 /* Going to hibernate */
-#define PM_POST_HIBERNATION	0x0002 /* Hibernation finished */
-#define PM_SUSPEND_PREPARE	0x0003 /* Going to suspend the system */
-#define PM_POST_SUSPEND		0x0004 /* Suspend finished */
-#define PM_RESTORE_PREPARE	0x0005 /* Going to restore a saved image */
-#define PM_POST_RESTORE		0x0006 /* Restore failed */
-
-#define NOTIFY_DONE		0x0000		/* Don't care */
-#define NOTIFY_OK		0x0001		/* Suits me */
-#define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
-#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)
-						/* Bad/Veto action */
-/*
- * Clean way to return from the notifier and stop further calls.
- */
-#define NOTIFY_STOP		(NOTIFY_OK|NOTIFY_STOP_MASK)
-
-/*
- * Bug checks.
- */
-
-#define BUG_ON(c) do { if (!(c)) abort(); } while (0)
-
-/*
- * Initialization -- Must be called before calling any primitives.
- */
-
-static void smp_init(void)
-{
-	int i;
-
-	spin_lock_init(&__thread_id_map_mutex);
-	__thread_id_map[0] = pthread_self();
-	for (i = 1; i < NR_THREADS; i++)
-		__thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
-	init_per_thread(smp_processor_id, 0);
-	if (pthread_key_create(&thread_id_key, NULL) != 0) {
-		perror("pthread_key_create");
-		exit(-1);
-	}
-}
-
-/* Taken from the Linux kernel source tree, so GPLv2-only!!! */
-
-#ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-
-#define LIST_POISON1  ((void *) 0x00100100)
-#define LIST_POISON2  ((void *) 0x00200200)
-
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
-#define container_of(ptr, type, member) ({			\
-	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
-	(type *)( (char *)__mptr - offsetof(type,member) );})
-
-/*
- * Simple doubly linked list implementation.
- *
- * Some of the internal functions ("__xxx") are useful when
- * manipulating whole lists rather than single entries, as
- * sometimes we already know the next/prev entries and we can
- * generate better code by using them directly rather than
- * using the generic single-entry routines.
- */
-
-struct list_head {
-	struct list_head *next, *prev;
-};
-
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
-
-#define LIST_HEAD(name) \
-	struct list_head name = LIST_HEAD_INIT(name)
-
-static inline void INIT_LIST_HEAD(struct list_head *list)
-{
-	list->next = list;
-	list->prev = list;
-}
-
-/*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-#ifndef CONFIG_DEBUG_LIST
-static inline void __list_add(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next)
-{
-	next->prev = new;
-	new->next = next;
-	new->prev = prev;
-	prev->next = new;
-}
-#else
-extern void __list_add(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next);
-#endif
-
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
-	__list_add(new, head, head->next);
-}
-
-
-/**
- * list_add_tail - add a new entry
- * @new: new entry to be added
- * @head: list head to add it before
- *
- * Insert a new entry before the specified head.
- * This is useful for implementing queues.
- */
-static inline void list_add_tail(struct list_head *new, struct list_head *head)
-{
-	__list_add(new, head->prev, head);
-}
-
-/*
- * Delete a list entry by making the prev/next entries
- * point to each other.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
-	next->prev = prev;
-	prev->next = next;
-}
-
-/**
- * list_del - deletes entry from list.
- * @entry: the element to delete from the list.
- * Note: list_empty() on entry does not return true after this, the entry is
- * in an undefined state.
- */
-#ifndef CONFIG_DEBUG_LIST
-static inline void list_del(struct list_head *entry)
-{
-	__list_del(entry->prev, entry->next);
-	entry->next = LIST_POISON1;
-	entry->prev = LIST_POISON2;
-}
-#else
-extern void list_del(struct list_head *entry);
-#endif
-
-/**
- * list_replace - replace old entry by new one
- * @old : the element to be replaced
- * @new : the new element to insert
- *
- * If @old was empty, it will be overwritten.
- */
-static inline void list_replace(struct list_head *old,
-				struct list_head *new)
-{
-	new->next = old->next;
-	new->next->prev = new;
-	new->prev = old->prev;
-	new->prev->next = new;
-}
-
-static inline void list_replace_init(struct list_head *old,
-					struct list_head *new)
-{
-	list_replace(old, new);
-	INIT_LIST_HEAD(old);
-}
-
-/**
- * list_del_init - deletes entry from list and reinitialize it.
- * @entry: the element to delete from the list.
- */
-static inline void list_del_init(struct list_head *entry)
-{
-	__list_del(entry->prev, entry->next);
-	INIT_LIST_HEAD(entry);
-}
-
-/**
- * list_move - delete from one list and add as another's head
- * @list: the entry to move
- * @head: the head that will precede our entry
- */
-static inline void list_move(struct list_head *list, struct list_head *head)
-{
-	__list_del(list->prev, list->next);
-	list_add(list, head);
-}
-
-/**
- * list_move_tail - delete from one list and add as another's tail
- * @list: the entry to move
- * @head: the head that will follow our entry
- */
-static inline void list_move_tail(struct list_head *list,
-				  struct list_head *head)
-{
-	__list_del(list->prev, list->next);
-	list_add_tail(list, head);
-}
-
-/**
- * list_is_last - tests whether @list is the last entry in list @head
- * @list: the entry to test
- * @head: the head of the list
- */
-static inline int list_is_last(const struct list_head *list,
-				const struct list_head *head)
-{
-	return list->next == head;
-}
-
-/**
- * list_empty - tests whether a list is empty
- * @head: the list to test.
- */
-static inline int list_empty(const struct list_head *head)
-{
-	return head->next == head;
-}
-
-/**
- * list_empty_careful - tests whether a list is empty and not being modified
- * @head: the list to test
- *
- * Description:
- * tests whether a list is empty _and_ checks that no other CPU might be
- * in the process of modifying either member (next or prev)
- *
- * NOTE: using list_empty_careful() without synchronization
- * can only be safe if the only activity that can happen
- * to the list entry is list_del_init(). Eg. it cannot be used
- * if another CPU could re-list_add() it.
- */
-static inline int list_empty_careful(const struct list_head *head)
-{
-	struct list_head *next = head->next;
-	return (next == head) && (next == head->prev);
-}
-
-/**
- * list_is_singular - tests whether a list has just one entry.
- * @head: the list to test.
- */
-static inline int list_is_singular(const struct list_head *head)
-{
-	return !list_empty(head) && (head->next == head->prev);
-}
-
-static inline void __list_cut_position(struct list_head *list,
-		struct list_head *head, struct list_head *entry)
-{
-	struct list_head *new_first = entry->next;
-	list->next = head->next;
-	list->next->prev = list;
-	list->prev = entry;
-	entry->next = list;
-	head->next = new_first;
-	new_first->prev = head;
-}
-
-/**
- * list_cut_position - cut a list into two
- * @list: a new list to add all removed entries
- * @head: a list with entries
- * @entry: an entry within head, could be the head itself
- *	and if so we won't cut the list
- *
- * This helper moves the initial part of @head, up to and
- * including @entry, from @head to @list. You should
- * pass on @entry an element you know is on @head. @list
- * should be an empty list or a list you do not care about
- * losing its data.
- *
- */
-static inline void list_cut_position(struct list_head *list,
-		struct list_head *head, struct list_head *entry)
-{
-	if (list_empty(head))
-		return;
-	if (list_is_singular(head) &&
-		(head->next != entry && head != entry))
-		return;
-	if (entry == head)
-		INIT_LIST_HEAD(list);
-	else
-		__list_cut_position(list, head, entry);
-}
-
-static inline void __list_splice(const struct list_head *list,
-				 struct list_head *prev,
-				 struct list_head *next)
-{
-	struct list_head *first = list->next;
-	struct list_head *last = list->prev;
-
-	first->prev = prev;
-	prev->next = first;
-
-	last->next = next;
-	next->prev = last;
-}
-
-/**
- * list_splice - join two lists, this is designed for stacks
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice(const struct list_head *list,
-				struct list_head *head)
-{
-	if (!list_empty(list))
-		__list_splice(list, head, head->next);
-}
-
-/**
- * list_splice_tail - join two lists, each list being a queue
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice_tail(struct list_head *list,
-				struct list_head *head)
-{
-	if (!list_empty(list))
-		__list_splice(list, head->prev, head);
-}
-
-/**
- * list_splice_init - join two lists and reinitialise the emptied list.
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * The list at @list is reinitialised
- */
-static inline void list_splice_init(struct list_head *list,
-				    struct list_head *head)
-{
-	if (!list_empty(list)) {
-		__list_splice(list, head, head->next);
-		INIT_LIST_HEAD(list);
-	}
-}
-
-/**
- * list_splice_tail_init - join two lists and reinitialise the emptied list
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * Each of the lists is a queue.
- * The list at @list is reinitialised
- */
-static inline void list_splice_tail_init(struct list_head *list,
-					 struct list_head *head)
-{
-	if (!list_empty(list)) {
-		__list_splice(list, head->prev, head);
-		INIT_LIST_HEAD(list);
-	}
-}
-
-/**
- * list_entry - get the struct for this entry
- * @ptr:	the &struct list_head pointer.
- * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_entry(ptr, type, member) \
-	container_of(ptr, type, member)
-
-/**
- * list_first_entry - get the first element from a list
- * @ptr:	the list head to take the element from.
- * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
- *
- * Note, that list is expected to be not empty.
- */
-#define list_first_entry(ptr, type, member) \
-	list_entry((ptr)->next, type, member)
-
-/**
- * list_for_each	-	iterate over a list
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- */
-#define list_for_each(pos, head) \
-	for (pos = (head)->next; prefetch(pos->next), pos != (head); \
-        	pos = pos->next)
-
-/**
- * __list_for_each	-	iterate over a list
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- *
- * This variant differs from list_for_each() in that it's the
- * simplest possible list iteration code, no prefetching is done.
- * Use this for code that knows the list to be very short (empty
- * or 1 entry) most of the time.
- */
-#define __list_for_each(pos, head) \
-	for (pos = (head)->next; pos != (head); pos = pos->next)
-
-/**
- * list_for_each_prev	-	iterate over a list backwards
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- */
-#define list_for_each_prev(pos, head) \
-	for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \
-        	pos = pos->prev)
-
-/**
- * list_for_each_safe - iterate over a list safe against removal of list entry
- * @pos:	the &struct list_head to use as a loop cursor.
- * @n:		another &struct list_head to use as temporary storage
- * @head:	the head for your list.
- */
-#define list_for_each_safe(pos, n, head) \
-	for (pos = (head)->next, n = pos->next; pos != (head); \
-		pos = n, n = pos->next)
-
-/**
- * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
- * @pos:	the &struct list_head to use as a loop cursor.
- * @n:		another &struct list_head to use as temporary storage
- * @head:	the head for your list.
- */
-#define list_for_each_prev_safe(pos, n, head) \
-	for (pos = (head)->prev, n = pos->prev; \
-	     prefetch(pos->prev), pos != (head); \
-	     pos = n, n = pos->prev)
-
-/**
- * list_for_each_entry	-	iterate over list of given type
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_for_each_entry(pos, head, member)				\
-	for (pos = list_entry((head)->next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head); 	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_reverse - iterate backwards over list of given type.
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_for_each_entry_reverse(pos, head, member)			\
-	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
-	     prefetch(pos->member.prev), &pos->member != (head); 	\
-	     pos = list_entry(pos->member.prev, typeof(*pos), member))
-
-/**
- * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
- * @pos:	the type * to use as a start point
- * @head:	the head of the list
- * @member:	the name of the list_struct within the struct.
- *
- * Prepares a pos entry for use as a start point in list_for_each_entry_continue().
- */
-#define list_prepare_entry(pos, head, member) \
-	((pos) ? : list_entry(head, typeof(*pos), member))
-
-/**
- * list_for_each_entry_continue - continue iteration over list of given type
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Continue to iterate over list of given type, continuing after
- * the current position.
- */
-#define list_for_each_entry_continue(pos, head, member) 		\
-	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head);	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_continue_reverse - iterate backwards from the given point
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Start to iterate over list of given type backwards, continuing after
- * the current position.
- */
-#define list_for_each_entry_continue_reverse(pos, head, member)		\
-	for (pos = list_entry(pos->member.prev, typeof(*pos), member);	\
-	     prefetch(pos->member.prev), &pos->member != (head);	\
-	     pos = list_entry(pos->member.prev, typeof(*pos), member))
-
-/**
- * list_for_each_entry_from - iterate over list of given type from the current point
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate over list of given type, continuing from current position.
- */
-#define list_for_each_entry_from(pos, head, member) 			\
-	for (; prefetch(pos->member.next), &pos->member != (head);	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_for_each_entry_safe(pos, n, head, member)			\
-	for (pos = list_entry((head)->next, typeof(*pos), member),	\
-		n = list_entry(pos->member.next, typeof(*pos), member);	\
-	     &pos->member != (head); 					\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-/**
- * list_for_each_entry_safe_continue
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate over list of given type, continuing after current point,
- * safe against removal of list entry.
- */
-#define list_for_each_entry_safe_continue(pos, n, head, member) 		\
-	for (pos = list_entry(pos->member.next, typeof(*pos), member), 		\
-		n = list_entry(pos->member.next, typeof(*pos), member);		\
-	     &pos->member != (head);						\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-/**
- * list_for_each_entry_safe_from
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate over list of given type from current point, safe against
- * removal of list entry.
- */
-#define list_for_each_entry_safe_from(pos, n, head, member) 			\
-	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
-	     &pos->member != (head);						\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-/**
- * list_for_each_entry_safe_reverse
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate backwards over list of given type, safe against removal
- * of list entry.
- */
-#define list_for_each_entry_safe_reverse(pos, n, head, member)		\
-	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
-		n = list_entry(pos->member.prev, typeof(*pos), member);	\
-	     &pos->member != (head); 					\
-	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
-
-/*
- * Double linked lists with a single pointer list head.
- * Mostly useful for hash tables where the two pointer list head is
- * too wasteful.
- * You lose the ability to access the tail in O(1).
- */
-
-struct hlist_head {
-	struct hlist_node *first;
-};
-
-struct hlist_node {
-	struct hlist_node *next, **pprev;
-};
-
-#define HLIST_HEAD_INIT { .first = NULL }
-#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
-#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
-static inline void INIT_HLIST_NODE(struct hlist_node *h)
-{
-	h->next = NULL;
-	h->pprev = NULL;
-}
-
-static inline int hlist_unhashed(const struct hlist_node *h)
-{
-	return !h->pprev;
-}
-
-static inline int hlist_empty(const struct hlist_head *h)
-{
-	return !h->first;
-}
-
-static inline void __hlist_del(struct hlist_node *n)
-{
-	struct hlist_node *next = n->next;
-	struct hlist_node **pprev = n->pprev;
-	*pprev = next;
-	if (next)
-		next->pprev = pprev;
-}
-
-static inline void hlist_del(struct hlist_node *n)
-{
-	__hlist_del(n);
-	n->next = LIST_POISON1;
-	n->pprev = LIST_POISON2;
-}
-
-static inline void hlist_del_init(struct hlist_node *n)
-{
-	if (!hlist_unhashed(n)) {
-		__hlist_del(n);
-		INIT_HLIST_NODE(n);
-	}
-}
-
-static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
-{
-	struct hlist_node *first = h->first;
-	n->next = first;
-	if (first)
-		first->pprev = &n->next;
-	h->first = n;
-	n->pprev = &h->first;
-}
-
-/* next must be != NULL */
-static inline void hlist_add_before(struct hlist_node *n,
-					struct hlist_node *next)
-{
-	n->pprev = next->pprev;
-	n->next = next;
-	next->pprev = &n->next;
-	*(n->pprev) = n;
-}
-
-static inline void hlist_add_after(struct hlist_node *n,
-					struct hlist_node *next)
-{
-	next->next = n->next;
-	n->next = next;
-	next->pprev = &n->next;
-
-	if(next->next)
-		next->next->pprev  = &next->next;
-}
-
-/*
- * Move a list from one list head to another. Fixup the pprev
- * reference of the first entry if it exists.
- */
-static inline void hlist_move_list(struct hlist_head *old,
-				   struct hlist_head *new)
-{
-	new->first = old->first;
-	if (new->first)
-		new->first->pprev = &new->first;
-	old->first = NULL;
-}
-
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
-
-#define hlist_for_each(pos, head) \
-	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
-	     pos = pos->next)
-
-#define hlist_for_each_safe(pos, n, head) \
-	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
-	     pos = n)
-
-/**
- * hlist_for_each_entry	- iterate over list of given type
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry(tpos, pos, head, member)			 \
-	for (pos = (head)->first;					 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-/**
- * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_continue(tpos, pos, member)		 \
-	for (pos = (pos)->next;						 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-/**
- * hlist_for_each_entry_from - iterate over a hlist continuing from current point
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_from(tpos, pos, member)			 \
-	for (; pos && ({ prefetch(pos->next); 1;}) &&			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-/**
- * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @n:		another &struct hlist_node to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
-	for (pos = (head)->first;					 \
-	     pos && ({ n = pos->next; 1; }) && 				 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = n)
-
-#endif
diff --git a/api_x86.h b/api_x86.h
deleted file mode 100644
index f48fce9..0000000
--- a/api_x86.h
+++ /dev/null
@@ -1,1387 +0,0 @@
-/* MECHANICALLY GENERATED, DO NOT EDIT!!! */
-
-#define _INCLUDE_API_H
-
-/*
- * common.h: Common Linux kernel-isms.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; but version 2 of the License only due
- * to code included from the Linux kernel.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (c) 2006 Paul E. McKenney, IBM.
- *
- * Much code taken from the Linux kernel.  For such code, the option
- * to redistribute under later versions of GPL might not be available.
- */
-
-#ifndef __always_inline
-#define __always_inline inline
-#endif
-
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
-
-#ifdef __ASSEMBLY__
-#  define stringify_in_c(...)   __VA_ARGS__
-#  define ASM_CONST(x)          x
-#else
-/* This version of stringify will deal with commas... */
-#  define __stringify_in_c(...) #__VA_ARGS__
-#  define stringify_in_c(...)   __stringify_in_c(__VA_ARGS__) " "
-#  define __ASM_CONST(x)        x##UL
-#  define ASM_CONST(x)          __ASM_CONST(x)
-#endif
-
-
-/*
- * arch-i386.h: Expose x86 atomic instructions.  80486 and better only.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, but version 2 only due to inclusion
- * of Linux-kernel code.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (c) 2006 Paul E. McKenney, IBM.
- *
- * Much code taken from the Linux kernel.  For such code, the option
- * to redistribute under later versions of GPL might not be available.
- */
-
-/*
- * Machine parameters.
- */
-
-#define CACHE_LINE_SIZE 64
-#define ____cacheline_internodealigned_in_smp \
-	__attribute__((__aligned__(1 << 6)))
-
-#define LOCK_PREFIX "lock ; "
-
-/*
- * Atomic data structure, initialization, and access.
- */
-
-typedef struct { volatile int counter; } atomic_t;
-
-#define ATOMIC_INIT(i)  { (i) }
-
-#define atomic_read(v)		((v)->counter)
-#define atomic_set(v, i)	(((v)->counter) = (i))
-
-/*
- * Atomic operations.
- */
-
-/**
- * atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- * 
- * Atomically adds @i to @v.
- */
-static __inline__ void atomic_add(int i, atomic_t *v)
-{
-	__asm__ __volatile__(
-		LOCK_PREFIX "addl %1,%0"
-		:"+m" (v->counter)
-		:"ir" (i));
-}
-
-/**
- * atomic_sub - subtract the atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- * 
- * Atomically subtracts @i from @v.
- */
-static __inline__ void atomic_sub(int i, atomic_t *v)
-{
-	__asm__ __volatile__(
-		LOCK_PREFIX "subl %1,%0"
-		:"+m" (v->counter)
-		:"ir" (i));
-}
-
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- * 
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		LOCK_PREFIX "subl %2,%0; sete %1"
-		:"+m" (v->counter), "=qm" (c)
-		:"ir" (i) : "memory");
-	return c;
-}
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- * 
- * Atomically increments @v by 1.
- */ 
-static __inline__ void atomic_inc(atomic_t *v)
-{
-	__asm__ __volatile__(
-		LOCK_PREFIX "incl %0"
-		:"+m" (v->counter));
-}
-
-/**
- * atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- * 
- * Atomically decrements @v by 1.
- */ 
-static __inline__ void atomic_dec(atomic_t *v)
-{
-	__asm__ __volatile__(
-		LOCK_PREFIX "decl %0"
-		:"+m" (v->counter));
-}
-
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- * 
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */ 
-static __inline__ int atomic_dec_and_test(atomic_t *v)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		LOCK_PREFIX "decl %0; sete %1"
-		:"+m" (v->counter), "=qm" (c)
-		: : "memory");
-	return c != 0;
-}
-
-/**
- * atomic_inc_and_test - increment and test 
- * @v: pointer of type atomic_t
- * 
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */ 
-static __inline__ int atomic_inc_and_test(atomic_t *v)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		LOCK_PREFIX "incl %0; sete %1"
-		:"+m" (v->counter), "=qm" (c)
-		: : "memory");
-	return c != 0;
-}
-
-/**
- * atomic_add_negative - add and test if negative
- * @v: pointer of type atomic_t
- * @i: integer value to add
- * 
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */ 
-static __inline__ int atomic_add_negative(int i, atomic_t *v)
-{
-	unsigned char c;
-
-	__asm__ __volatile__(
-		LOCK_PREFIX "addl %2,%0; sets %1"
-		:"+m" (v->counter), "=qm" (c)
-		:"ir" (i) : "memory");
-	return c;
-}
-
-/**
- * atomic_add_return - add and return
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static __inline__ int atomic_add_return(int i, atomic_t *v)
-{
-	int __i;
-
-	__i = i;
-	__asm__ __volatile__(
-		LOCK_PREFIX "xaddl %0, %1;"
-		:"=r"(i)
-		:"m"(v->counter), "0"(i));
-	return i + __i;
-}
-
-static __inline__ int atomic_sub_return(int i, atomic_t *v)
-{
-	return atomic_add_return(-i,v);
-}
-
-static inline unsigned int
-cmpxchg(volatile long *ptr, long oldval, long newval)
-{
-	unsigned long retval;
-
-	asm("# cmpxchg\n"
-	    "lock; cmpxchgl %4,(%2)\n"
-	    "# end atomic_cmpxchg4"
-	    : "=a" (retval), "=m" (*ptr)
-	    : "r" (ptr), "0" (oldval), "r" (newval), "m" (*ptr)
-	    : "cc");
-	return (retval);
-}
-
-#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-/**
- * atomic_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
- */
-#define atomic_add_unless(v, a, u)				\
-({								\
-	int c, old;						\
-	c = atomic_read(v);					\
-	for (;;) {						\
-		if (unlikely(c == (u)))				\
-			break;					\
-		old = atomic_cmpxchg((v), c, c + (a));		\
-		if (likely(old == c))				\
-			break;					\
-		c = old;					\
-	}							\
-	c != (u);						\
-})
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
-
-#define atomic_inc_return(v)  (atomic_add_return(1,v))
-#define atomic_dec_return(v)  (atomic_sub_return(1,v))
-
-/* These are x86-specific, used by some header files */
-#define atomic_clear_mask(mask, addr) \
-__asm__ __volatile__(LOCK_PREFIX "andl %0,%1" \
-: : "r" (~(mask)),"m" (*addr) : "memory")
-
-#define atomic_set_mask(mask, addr) \
-__asm__ __volatile__(LOCK_PREFIX "orl %0,%1" \
-: : "r" (mask),"m" (*(addr)) : "memory")
-
-/* Atomic operations are already serializing on x86 */
-#define smp_mb__before_atomic_dec()	barrier()
-#define smp_mb__after_atomic_dec()	barrier()
-#define smp_mb__before_atomic_inc()	barrier()
-#define smp_mb__after_atomic_inc()	barrier()
-
-/*
- * api_pthreads.h: API mapping to pthreads environment.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.  However, please note that much
- * of the code in this file derives from the Linux kernel, and that such
- * code may not be available except under GPLv2.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (c) 2006 Paul E. McKenney, IBM.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <limits.h>
-#include <sys/types.h>
-#define __USE_GNU
-#include <pthread.h>
-#include <sched.h>
-#include <sys/param.h>
-/* #include "atomic.h" */
-
-/*
- * Compiler magic.
- */
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
-#define container_of(ptr, type, member) ({			\
-	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
-	(type *)( (char *)__mptr - offsetof(type,member) );})
-
-/*
- * Default machine parameters.
- */
-
-#ifndef CACHE_LINE_SIZE
-#define CACHE_LINE_SIZE 128
-#endif /* #ifndef CACHE_LINE_SIZE */
-
-/*
- * Exclusive locking primitives.
- */
-
-typedef pthread_mutex_t spinlock_t;
-
-#define DEFINE_SPINLOCK(lock) spinlock_t lock = PTHREAD_MUTEX_INITIALIZER;
-#define __SPIN_LOCK_UNLOCKED(lockp) PTHREAD_MUTEX_INITIALIZER
-
-static void spin_lock_init(spinlock_t *sp)
-{
-	if (pthread_mutex_init(sp, NULL) != 0) {
-		perror("spin_lock_init:pthread_mutex_init");
-		exit(-1);
-	}
-}
-
-static void spin_lock(spinlock_t *sp)
-{
-	if (pthread_mutex_lock(sp) != 0) {
-		perror("spin_lock:pthread_mutex_lock");
-		exit(-1);
-	}
-}
-
-static void spin_unlock(spinlock_t *sp)
-{
-	if (pthread_mutex_unlock(sp) != 0) {
-		perror("spin_unlock:pthread_mutex_unlock");
-		exit(-1);
-	}
-}
-
-#define spin_lock_irqsave(l, f) do { f = 1; spin_lock(l); } while (0)
-#define spin_unlock_irqrestore(l, f) do { f = 0; spin_unlock(l); } while (0)
-
-/*
- * Thread creation/destruction primitives.
- */
-
-typedef pthread_t thread_id_t;
-
-#define NR_THREADS 128
-
-#define __THREAD_ID_MAP_EMPTY 0
-#define __THREAD_ID_MAP_WAITING 1
-thread_id_t __thread_id_map[NR_THREADS];
-spinlock_t __thread_id_map_mutex;
-
-#define for_each_thread(t) \
-	for (t = 0; t < NR_THREADS; t++)
-
-#define for_each_running_thread(t) \
-	for (t = 0; t < NR_THREADS; t++) \
-		if ((__thread_id_map[t] != __THREAD_ID_MAP_EMPTY) && \
-		    (__thread_id_map[t] != __THREAD_ID_MAP_WAITING))
-
-pthread_key_t thread_id_key;
-
-static int __smp_thread_id(void)
-{
-	int i;
-	thread_id_t tid = pthread_self();
-
-	for (i = 0; i < NR_THREADS; i++) {
-		if (__thread_id_map[i] == tid) {
-			long v = i + 1;  /* must be non-NULL. */
-
-			if (pthread_setspecific(thread_id_key, (void *)v) != 0) {
-				perror("pthread_setspecific");
-				exit(-1);
-			}
-			return i;
-		}
-	}
-	spin_lock(&__thread_id_map_mutex);
-	for (i = 0; i < NR_THREADS; i++) {
-		if (__thread_id_map[i] == tid)
-			spin_unlock(&__thread_id_map_mutex);
-			return i;
-	}
-	spin_unlock(&__thread_id_map_mutex);
-	fprintf(stderr, "smp_thread_id: Rogue thread, id: %d(%#x)\n",
-			(int)tid, (int)tid);
-	exit(-1);
-}
-
-static int smp_thread_id(void)
-{
-	void *id;
-
-	id = pthread_getspecific(thread_id_key);
-	if (id == NULL)
-		return __smp_thread_id();
-	return (long)(id - 1);
-}
-
-static thread_id_t create_thread(void *(*func)(void *), void *arg)
-{
-	thread_id_t tid;
-	int i;
-
-	spin_lock(&__thread_id_map_mutex);
-	for (i = 0; i < NR_THREADS; i++) {
-		if (__thread_id_map[i] == __THREAD_ID_MAP_EMPTY)
-			break;
-	}
-	if (i >= NR_THREADS) {
-		spin_unlock(&__thread_id_map_mutex);
-		fprintf(stderr, "Thread limit of %d exceeded!\n", NR_THREADS);
-		exit(-1);
-	}
-	__thread_id_map[i] = __THREAD_ID_MAP_WAITING;
-	spin_unlock(&__thread_id_map_mutex);
-	if (pthread_create(&tid, NULL, func, arg) != 0) {
-		perror("create_thread:pthread_create");
-		exit(-1);
-	}
-	__thread_id_map[i] = tid;
-	return tid;
-}
-
-static void *wait_thread(thread_id_t tid)
-{
-	int i;
-	void *vp;
-
-	for (i = 0; i < NR_THREADS; i++) {
-		if (__thread_id_map[i] == tid)
-			break;
-	}
-	if (i >= NR_THREADS){
-		fprintf(stderr, "wait_thread: bad tid = %d(%#x)\n",
-				(int)tid, (int)tid);
-		exit(-1);
-	}
-	if (pthread_join(tid, &vp) != 0) {
-		perror("wait_thread:pthread_join");
-		exit(-1);
-	}
-	__thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
-	return vp;
-}
-
-static void wait_all_threads(void)
-{
-	int i;
-	thread_id_t tid;
-
-	for (i = 1; i < NR_THREADS; i++) {
-		tid = __thread_id_map[i];
-		if (tid != __THREAD_ID_MAP_EMPTY &&
-		    tid != __THREAD_ID_MAP_WAITING)
-			(void)wait_thread(tid);
-	}
-}
-
-static void run_on(int cpu)
-{
-	cpu_set_t mask;
-
-	CPU_ZERO(&mask);
-	CPU_SET(cpu, &mask);
-	sched_setaffinity(0, sizeof(mask), &mask);
-}
-
-/*
- * timekeeping -- very crude -- should use MONOTONIC...
- */
-
-long long get_microseconds(void)
-{
-	struct timeval tv;
-
-	if (gettimeofday(&tv, NULL) != 0)
-		abort();
-	return ((long long)tv.tv_sec) * 1000000LL + (long long)tv.tv_usec;
-}
-
-/*
- * Per-thread variables.
- */
-
-#define DEFINE_PER_THREAD(type, name) \
-	struct { \
-		__typeof__(type) v \
-			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
-	} __per_thread_##name[NR_THREADS];
-#define DECLARE_PER_THREAD(type, name) extern DEFINE_PER_THREAD(type, name)
-
-#define per_thread(name, thread) __per_thread_##name[thread].v
-#define __get_thread_var(name) per_thread(name, smp_thread_id())
-
-#define init_per_thread(name, v) \
-	do { \
-		int __i_p_t_i; \
-		for (__i_p_t_i = 0; __i_p_t_i < NR_THREADS; __i_p_t_i++) \
-			per_thread(name, __i_p_t_i) = v; \
-	} while (0)
-
-/*
- * CPU traversal primitives.
- */
-
-#ifndef NR_CPUS
-#define NR_CPUS 16
-#endif /* #ifndef NR_CPUS */
-
-#define for_each_possible_cpu(cpu) \
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
-#define for_each_online_cpu(cpu) \
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
-
-/*
- * Per-CPU variables.
- */
-
-#define DEFINE_PER_CPU(type, name) \
-	struct { \
-		__typeof__(type) v \
-			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
-	} __per_cpu_##name[NR_CPUS]
-#define DECLARE_PER_CPU(type, name) extern DEFINE_PER_CPU(type, name)
-
-DEFINE_PER_THREAD(int, smp_processor_id);
-
-#define per_cpu(name, thread) __per_cpu_##name[thread].v
-#define __get_cpu_var(name) per_cpu(name, smp_processor_id())
-
-#define init_per_cpu(name, v) \
-	do { \
-		int __i_p_c_i; \
-		for (__i_p_c_i = 0; __i_p_c_i < NR_CPUS; __i_p_c_i++) \
-			per_cpu(name, __i_p_c_i) = v; \
-	} while (0)
-
-/*
- * CPU state checking (crowbarred).
- */
-
-#define idle_cpu(cpu) 0
-#define in_softirq() 1
-#define hardirq_count() 0
-#define PREEMPT_SHIFT   0
-#define SOFTIRQ_SHIFT   (PREEMPT_SHIFT + PREEMPT_BITS)
-#define HARDIRQ_SHIFT   (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
-#define PREEMPT_BITS    8
-#define SOFTIRQ_BITS    8
-
-/*
- * CPU hotplug.
- */
-
-struct notifier_block {
-	int (*notifier_call)(struct notifier_block *, unsigned long, void *);
-	struct notifier_block *next;
-	int priority;
-};
-
-#define CPU_ONLINE		0x0002 /* CPU (unsigned)v is up */
-#define CPU_UP_PREPARE		0x0003 /* CPU (unsigned)v coming up */
-#define CPU_UP_CANCELED		0x0004 /* CPU (unsigned)v NOT coming up */
-#define CPU_DOWN_PREPARE	0x0005 /* CPU (unsigned)v going down */
-#define CPU_DOWN_FAILED		0x0006 /* CPU (unsigned)v NOT going down */
-#define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
-#define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
-				        * not handling interrupts, soon dead */
-#define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
-					* lock is dropped */
-
-/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
- * operation in progress
- */
-#define CPU_TASKS_FROZEN	0x0010
-
-#define CPU_ONLINE_FROZEN	(CPU_ONLINE | CPU_TASKS_FROZEN)
-#define CPU_UP_PREPARE_FROZEN	(CPU_UP_PREPARE | CPU_TASKS_FROZEN)
-#define CPU_UP_CANCELED_FROZEN	(CPU_UP_CANCELED | CPU_TASKS_FROZEN)
-#define CPU_DOWN_PREPARE_FROZEN	(CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
-#define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
-#define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
-#define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
-
-/* Hibernation and suspend events */
-#define PM_HIBERNATION_PREPARE	0x0001 /* Going to hibernate */
-#define PM_POST_HIBERNATION	0x0002 /* Hibernation finished */
-#define PM_SUSPEND_PREPARE	0x0003 /* Going to suspend the system */
-#define PM_POST_SUSPEND		0x0004 /* Suspend finished */
-#define PM_RESTORE_PREPARE	0x0005 /* Going to restore a saved image */
-#define PM_POST_RESTORE		0x0006 /* Restore failed */
-
-#define NOTIFY_DONE		0x0000		/* Don't care */
-#define NOTIFY_OK		0x0001		/* Suits me */
-#define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
-#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)
-						/* Bad/Veto action */
-/*
- * Clean way to return from the notifier and stop further calls.
- */
-#define NOTIFY_STOP		(NOTIFY_OK|NOTIFY_STOP_MASK)
-
-/*
- * Bug checks.
- */
-
-#define BUG_ON(c) do { if (!(c)) abort(); } while (0)
-
-/*
- * Initialization -- Must be called before calling any primitives.
- */
-
-static void smp_init(void)
-{
-	int i;
-
-	spin_lock_init(&__thread_id_map_mutex);
-	__thread_id_map[0] = pthread_self();
-	for (i = 1; i < NR_THREADS; i++)
-		__thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
-	init_per_thread(smp_processor_id, 0);
-	if (pthread_key_create(&thread_id_key, NULL) != 0) {
-		perror("pthread_key_create");
-		exit(-1);
-	}
-}
-
-/* Taken from the Linux kernel source tree, so GPLv2-only!!! */
-
-#ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-
-#define LIST_POISON1  ((void *) 0x00100100)
-#define LIST_POISON2  ((void *) 0x00200200)
-
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
-#define container_of(ptr, type, member) ({			\
-	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
-	(type *)( (char *)__mptr - offsetof(type,member) );})
-
-/*
- * Simple doubly linked list implementation.
- *
- * Some of the internal functions ("__xxx") are useful when
- * manipulating whole lists rather than single entries, as
- * sometimes we already know the next/prev entries and we can
- * generate better code by using them directly rather than
- * using the generic single-entry routines.
- */
-
-struct list_head {
-	struct list_head *next, *prev;
-};
-
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
-
-#define LIST_HEAD(name) \
-	struct list_head name = LIST_HEAD_INIT(name)
-
-static inline void INIT_LIST_HEAD(struct list_head *list)
-{
-	list->next = list;
-	list->prev = list;
-}
-
-/*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-#ifndef CONFIG_DEBUG_LIST
-static inline void __list_add(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next)
-{
-	next->prev = new;
-	new->next = next;
-	new->prev = prev;
-	prev->next = new;
-}
-#else
-extern void __list_add(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next);
-#endif
-
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
-	__list_add(new, head, head->next);
-}
-
-
-/**
- * list_add_tail - add a new entry
- * @new: new entry to be added
- * @head: list head to add it before
- *
- * Insert a new entry before the specified head.
- * This is useful for implementing queues.
- */
-static inline void list_add_tail(struct list_head *new, struct list_head *head)
-{
-	__list_add(new, head->prev, head);
-}
-
-/*
- * Delete a list entry by making the prev/next entries
- * point to each other.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
-	next->prev = prev;
-	prev->next = next;
-}
-
-/**
- * list_del - deletes entry from list.
- * @entry: the element to delete from the list.
- * Note: list_empty() on entry does not return true after this, the entry is
- * in an undefined state.
- */
-#ifndef CONFIG_DEBUG_LIST
-static inline void list_del(struct list_head *entry)
-{
-	__list_del(entry->prev, entry->next);
-	entry->next = LIST_POISON1;
-	entry->prev = LIST_POISON2;
-}
-#else
-extern void list_del(struct list_head *entry);
-#endif
-
-/**
- * list_replace - replace old entry by new one
- * @old : the element to be replaced
- * @new : the new element to insert
- *
- * If @old was empty, it will be overwritten.
- */
-static inline void list_replace(struct list_head *old,
-				struct list_head *new)
-{
-	new->next = old->next;
-	new->next->prev = new;
-	new->prev = old->prev;
-	new->prev->next = new;
-}
-
-static inline void list_replace_init(struct list_head *old,
-					struct list_head *new)
-{
-	list_replace(old, new);
-	INIT_LIST_HEAD(old);
-}
-
-/**
- * list_del_init - deletes entry from list and reinitialize it.
- * @entry: the element to delete from the list.
- */
-static inline void list_del_init(struct list_head *entry)
-{
-	__list_del(entry->prev, entry->next);
-	INIT_LIST_HEAD(entry);
-}
-
-/**
- * list_move - delete from one list and add as another's head
- * @list: the entry to move
- * @head: the head that will precede our entry
- */
-static inline void list_move(struct list_head *list, struct list_head *head)
-{
-	__list_del(list->prev, list->next);
-	list_add(list, head);
-}
-
-/**
- * list_move_tail - delete from one list and add as another's tail
- * @list: the entry to move
- * @head: the head that will follow our entry
- */
-static inline void list_move_tail(struct list_head *list,
-				  struct list_head *head)
-{
-	__list_del(list->prev, list->next);
-	list_add_tail(list, head);
-}
-
-/**
- * list_is_last - tests whether @list is the last entry in list @head
- * @list: the entry to test
- * @head: the head of the list
- */
-static inline int list_is_last(const struct list_head *list,
-				const struct list_head *head)
-{
-	return list->next == head;
-}
-
-/**
- * list_empty - tests whether a list is empty
- * @head: the list to test.
- */
-static inline int list_empty(const struct list_head *head)
-{
-	return head->next == head;
-}
-
-/**
- * list_empty_careful - tests whether a list is empty and not being modified
- * @head: the list to test
- *
- * Description:
- * tests whether a list is empty _and_ checks that no other CPU might be
- * in the process of modifying either member (next or prev)
- *
- * NOTE: using list_empty_careful() without synchronization
- * can only be safe if the only activity that can happen
- * to the list entry is list_del_init(). Eg. it cannot be used
- * if another CPU could re-list_add() it.
- */
-static inline int list_empty_careful(const struct list_head *head)
-{
-	struct list_head *next = head->next;
-	return (next == head) && (next == head->prev);
-}
-
-/**
- * list_is_singular - tests whether a list has just one entry.
- * @head: the list to test.
- */
-static inline int list_is_singular(const struct list_head *head)
-{
-	return !list_empty(head) && (head->next == head->prev);
-}
-
-static inline void __list_cut_position(struct list_head *list,
-		struct list_head *head, struct list_head *entry)
-{
-	struct list_head *new_first = entry->next;
-	list->next = head->next;
-	list->next->prev = list;
-	list->prev = entry;
-	entry->next = list;
-	head->next = new_first;
-	new_first->prev = head;
-}
-
-/**
- * list_cut_position - cut a list into two
- * @list: a new list to add all removed entries
- * @head: a list with entries
- * @entry: an entry within head, could be the head itself
- *	and if so we won't cut the list
- *
- * This helper moves the initial part of @head, up to and
- * including @entry, from @head to @list. You should
- * pass on @entry an element you know is on @head. @list
- * should be an empty list or a list you do not care about
- * losing its data.
- *
- */
-static inline void list_cut_position(struct list_head *list,
-		struct list_head *head, struct list_head *entry)
-{
-	if (list_empty(head))
-		return;
-	if (list_is_singular(head) &&
-		(head->next != entry && head != entry))
-		return;
-	if (entry == head)
-		INIT_LIST_HEAD(list);
-	else
-		__list_cut_position(list, head, entry);
-}
-
-static inline void __list_splice(const struct list_head *list,
-				 struct list_head *prev,
-				 struct list_head *next)
-{
-	struct list_head *first = list->next;
-	struct list_head *last = list->prev;
-
-	first->prev = prev;
-	prev->next = first;
-
-	last->next = next;
-	next->prev = last;
-}
-
-/**
- * list_splice - join two lists, this is designed for stacks
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice(const struct list_head *list,
-				struct list_head *head)
-{
-	if (!list_empty(list))
-		__list_splice(list, head, head->next);
-}
-
-/**
- * list_splice_tail - join two lists, each list being a queue
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice_tail(struct list_head *list,
-				struct list_head *head)
-{
-	if (!list_empty(list))
-		__list_splice(list, head->prev, head);
-}
-
-/**
- * list_splice_init - join two lists and reinitialise the emptied list.
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * The list at @list is reinitialised
- */
-static inline void list_splice_init(struct list_head *list,
-				    struct list_head *head)
-{
-	if (!list_empty(list)) {
-		__list_splice(list, head, head->next);
-		INIT_LIST_HEAD(list);
-	}
-}
-
-/**
- * list_splice_tail_init - join two lists and reinitialise the emptied list
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * Each of the lists is a queue.
- * The list at @list is reinitialised
- */
-static inline void list_splice_tail_init(struct list_head *list,
-					 struct list_head *head)
-{
-	if (!list_empty(list)) {
-		__list_splice(list, head->prev, head);
-		INIT_LIST_HEAD(list);
-	}
-}
-
-/**
- * list_entry - get the struct for this entry
- * @ptr:	the &struct list_head pointer.
- * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_entry(ptr, type, member) \
-	container_of(ptr, type, member)
-
-/**
- * list_first_entry - get the first element from a list
- * @ptr:	the list head to take the element from.
- * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
- *
- * Note, that list is expected to be not empty.
- */
-#define list_first_entry(ptr, type, member) \
-	list_entry((ptr)->next, type, member)
-
-/**
- * list_for_each	-	iterate over a list
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- */
-#define list_for_each(pos, head) \
-	for (pos = (head)->next; prefetch(pos->next), pos != (head); \
-        	pos = pos->next)
-
-/**
- * __list_for_each	-	iterate over a list
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- *
- * This variant differs from list_for_each() in that it's the
- * simplest possible list iteration code, no prefetching is done.
- * Use this for code that knows the list to be very short (empty
- * or 1 entry) most of the time.
- */
-#define __list_for_each(pos, head) \
-	for (pos = (head)->next; pos != (head); pos = pos->next)
-
-/**
- * list_for_each_prev	-	iterate over a list backwards
- * @pos:	the &struct list_head to use as a loop cursor.
- * @head:	the head for your list.
- */
-#define list_for_each_prev(pos, head) \
-	for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \
-        	pos = pos->prev)
-
-/**
- * list_for_each_safe - iterate over a list safe against removal of list entry
- * @pos:	the &struct list_head to use as a loop cursor.
- * @n:		another &struct list_head to use as temporary storage
- * @head:	the head for your list.
- */
-#define list_for_each_safe(pos, n, head) \
-	for (pos = (head)->next, n = pos->next; pos != (head); \
-		pos = n, n = pos->next)
-
-/**
- * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
- * @pos:	the &struct list_head to use as a loop cursor.
- * @n:		another &struct list_head to use as temporary storage
- * @head:	the head for your list.
- */
-#define list_for_each_prev_safe(pos, n, head) \
-	for (pos = (head)->prev, n = pos->prev; \
-	     prefetch(pos->prev), pos != (head); \
-	     pos = n, n = pos->prev)
-
-/**
- * list_for_each_entry	-	iterate over list of given type
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_for_each_entry(pos, head, member)				\
-	for (pos = list_entry((head)->next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head); 	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_reverse - iterate backwards over list of given type.
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_for_each_entry_reverse(pos, head, member)			\
-	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
-	     prefetch(pos->member.prev), &pos->member != (head); 	\
-	     pos = list_entry(pos->member.prev, typeof(*pos), member))
-
-/**
- * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
- * @pos:	the type * to use as a start point
- * @head:	the head of the list
- * @member:	the name of the list_struct within the struct.
- *
- * Prepares a pos entry for use as a start point in list_for_each_entry_continue().
- */
-#define list_prepare_entry(pos, head, member) \
-	((pos) ? : list_entry(head, typeof(*pos), member))
-
-/**
- * list_for_each_entry_continue - continue iteration over list of given type
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Continue to iterate over list of given type, continuing after
- * the current position.
- */
-#define list_for_each_entry_continue(pos, head, member) 		\
-	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head);	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_continue_reverse - iterate backwards from the given point
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Start to iterate over list of given type backwards, continuing after
- * the current position.
- */
-#define list_for_each_entry_continue_reverse(pos, head, member)		\
-	for (pos = list_entry(pos->member.prev, typeof(*pos), member);	\
-	     prefetch(pos->member.prev), &pos->member != (head);	\
-	     pos = list_entry(pos->member.prev, typeof(*pos), member))
-
-/**
- * list_for_each_entry_from - iterate over list of given type from the current point
- * @pos:	the type * to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate over list of given type, continuing from current position.
- */
-#define list_for_each_entry_from(pos, head, member) 			\
-	for (; prefetch(pos->member.next), &pos->member != (head);	\
-	     pos = list_entry(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- */
-#define list_for_each_entry_safe(pos, n, head, member)			\
-	for (pos = list_entry((head)->next, typeof(*pos), member),	\
-		n = list_entry(pos->member.next, typeof(*pos), member);	\
-	     &pos->member != (head); 					\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-/**
- * list_for_each_entry_safe_continue
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate over list of given type, continuing after current point,
- * safe against removal of list entry.
- */
-#define list_for_each_entry_safe_continue(pos, n, head, member) 		\
-	for (pos = list_entry(pos->member.next, typeof(*pos), member), 		\
-		n = list_entry(pos->member.next, typeof(*pos), member);		\
-	     &pos->member != (head);						\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-/**
- * list_for_each_entry_safe_from
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate over list of given type from current point, safe against
- * removal of list entry.
- */
-#define list_for_each_entry_safe_from(pos, n, head, member) 			\
-	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
-	     &pos->member != (head);						\
-	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-/**
- * list_for_each_entry_safe_reverse
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
- *
- * Iterate backwards over list of given type, safe against removal
- * of list entry.
- */
-#define list_for_each_entry_safe_reverse(pos, n, head, member)		\
-	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
-		n = list_entry(pos->member.prev, typeof(*pos), member);	\
-	     &pos->member != (head); 					\
-	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
-
-/*
- * Double linked lists with a single pointer list head.
- * Mostly useful for hash tables where the two pointer list head is
- * too wasteful.
- * You lose the ability to access the tail in O(1).
- */
-
-struct hlist_head {
-	struct hlist_node *first;
-};
-
-struct hlist_node {
-	struct hlist_node *next, **pprev;
-};
-
-#define HLIST_HEAD_INIT { .first = NULL }
-#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
-#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
-static inline void INIT_HLIST_NODE(struct hlist_node *h)
-{
-	h->next = NULL;
-	h->pprev = NULL;
-}
-
-static inline int hlist_unhashed(const struct hlist_node *h)
-{
-	return !h->pprev;
-}
-
-static inline int hlist_empty(const struct hlist_head *h)
-{
-	return !h->first;
-}
-
-static inline void __hlist_del(struct hlist_node *n)
-{
-	struct hlist_node *next = n->next;
-	struct hlist_node **pprev = n->pprev;
-	*pprev = next;
-	if (next)
-		next->pprev = pprev;
-}
-
-static inline void hlist_del(struct hlist_node *n)
-{
-	__hlist_del(n);
-	n->next = LIST_POISON1;
-	n->pprev = LIST_POISON2;
-}
-
-static inline void hlist_del_init(struct hlist_node *n)
-{
-	if (!hlist_unhashed(n)) {
-		__hlist_del(n);
-		INIT_HLIST_NODE(n);
-	}
-}
-
-static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
-{
-	struct hlist_node *first = h->first;
-	n->next = first;
-	if (first)
-		first->pprev = &n->next;
-	h->first = n;
-	n->pprev = &h->first;
-}
-
-/* next must be != NULL */
-static inline void hlist_add_before(struct hlist_node *n,
-					struct hlist_node *next)
-{
-	n->pprev = next->pprev;
-	n->next = next;
-	next->pprev = &n->next;
-	*(n->pprev) = n;
-}
-
-static inline void hlist_add_after(struct hlist_node *n,
-					struct hlist_node *next)
-{
-	next->next = n->next;
-	n->next = next;
-	next->pprev = &n->next;
-
-	if(next->next)
-		next->next->pprev  = &next->next;
-}
-
-/*
- * Move a list from one list head to another. Fixup the pprev
- * reference of the first entry if it exists.
- */
-static inline void hlist_move_list(struct hlist_head *old,
-				   struct hlist_head *new)
-{
-	new->first = old->first;
-	if (new->first)
-		new->first->pprev = &new->first;
-	old->first = NULL;
-}
-
-#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
-
-#define hlist_for_each(pos, head) \
-	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
-	     pos = pos->next)
-
-#define hlist_for_each_safe(pos, n, head) \
-	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
-	     pos = n)
-
-/**
- * hlist_for_each_entry	- iterate over list of given type
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry(tpos, pos, head, member)			 \
-	for (pos = (head)->first;					 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-/**
- * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_continue(tpos, pos, member)		 \
-	for (pos = (pos)->next;						 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-/**
- * hlist_for_each_entry_from - iterate over a hlist continuing from current point
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_from(tpos, pos, member)			 \
-	for (; pos && ({ prefetch(pos->next); 1;}) &&			 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
-
-/**
- * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @n:		another &struct hlist_node to use as temporary storage
- * @head:	the head for your list.
- * @member:	the name of the hlist_node within the struct.
- */
-#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
-	for (pos = (head)->first;					 \
-	     pos && ({ n = pos->next; 1; }) && 				 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = n)
-
-#endif
diff --git a/debug_yield.patch b/debug_yield.patch
deleted file mode 100644
index 08c04aa..0000000
--- a/debug_yield.patch
+++ /dev/null
@@ -1,139 +0,0 @@
-diff --git a/urcu.c b/urcu.c
-index 162ce00..018e09b 100644
---- a/urcu.c
-+++ b/urcu.c
-@@ -91,24 +91,17 @@ static void force_mb_all_threads(void)
- 	 */
- 	if (!reader_data)
- 		return;
--	debug_yield_write();
- 	sig_done = 0;
--	debug_yield_write();
- 	smp_mb();	/* write sig_done before sending the signals */
--	debug_yield_write();
--	for (index = reader_data; index < reader_data + num_readers; index++) {
-+	for (index = reader_data; index < reader_data + num_readers; index++)
- 		pthread_kill(index->tid, SIGURCU);
--		debug_yield_write();
--	}
- 	/*
- 	 * Wait for sighandler (and thus mb()) to execute on every thread.
- 	 * BUSY-LOOP.
- 	 */
- 	while (sig_done < num_readers)
- 		barrier();
--	debug_yield_write();
- 	smp_mb();	/* read sig_done before ending the barrier */
--	debug_yield_write();
- }
- #endif
- 
-@@ -135,13 +128,10 @@ void synchronize_rcu(void)
- 	 * where new ptr points to. */
- 	/* Write new ptr before changing the qparity */
- 	force_mb_all_threads();
--	debug_yield_write();
- 
- 	internal_urcu_lock();
--	debug_yield_write();
- 
- 	switch_next_urcu_qparity();	/* 0 -> 1 */
--	debug_yield_write();
- 
- 	/*
- 	 * Must commit qparity update to memory before waiting for parity
-@@ -155,7 +145,6 @@ void synchronize_rcu(void)
- 	 * Wait for previous parity to be empty of readers.
- 	 */
- 	wait_for_quiescent_state();	/* Wait readers in parity 0 */
--	debug_yield_write();
- 
- 	/*
- 	 * Must finish waiting for quiescent state for parity 0 before
-@@ -166,7 +155,6 @@ void synchronize_rcu(void)
- 	smp_mb();
- 
- 	switch_next_urcu_qparity();	/* 1 -> 0 */
--	debug_yield_write();
- 
- 	/*
- 	 * Must commit qparity update to memory before waiting for parity
-@@ -180,17 +168,14 @@ void synchronize_rcu(void)
- 	 * Wait for previous parity to be empty of readers.
- 	 */
- 	wait_for_quiescent_state();	/* Wait readers in parity 1 */
--	debug_yield_write();
- 
- 	internal_urcu_unlock();
--	debug_yield_write();
- 
- 	/* All threads should finish using the data referred to by old ptr
- 	 * before decrementing their urcu_active_readers count */
- 	/* Finish waiting for reader threads before letting the old ptr being
- 	 * freed. */
- 	force_mb_all_threads();
--	debug_yield_write();
- }
- 
- void urcu_add_reader(pthread_t id)
-diff --git a/urcu.h b/urcu.h
-index 92b31df..1b663c7 100644
---- a/urcu.h
-+++ b/urcu.h
-@@ -219,13 +219,11 @@ static inline int rcu_old_gp_ongoing(long *value)
- 
- 	if (value == NULL)
- 		return 0;
--	debug_yield_write();
- 	/*
- 	 * Make sure both tests below are done on the same version of *value
- 	 * to insure consistency.
- 	 */
- 	v = ACCESS_ONCE(*value);
--	debug_yield_write();
- 	return (v & RCU_GP_CTR_NEST_MASK) &&
- 		 ((v ^ urcu_gp_ctr) & RCU_GP_CTR_BIT);
- }
-@@ -234,34 +232,27 @@ static inline void rcu_read_lock(void)
- {
- 	long tmp;
- 
--	debug_yield_read();
- 	tmp = urcu_active_readers;
--	debug_yield_read();
- 	/* urcu_gp_ctr = RCU_GP_COUNT | (~RCU_GP_CTR_BIT or RCU_GP_CTR_BIT) */
- 	if (likely(!(tmp & RCU_GP_CTR_NEST_MASK)))
- 		urcu_active_readers = urcu_gp_ctr;
- 	else
- 		urcu_active_readers = tmp + RCU_GP_COUNT;
--	debug_yield_read();
- 	/*
- 	 * Increment active readers count before accessing the pointer.
- 	 * See force_mb_all_threads().
- 	 */
- 	read_barrier();
--	debug_yield_read();
- }
- 
- static inline void rcu_read_unlock(void)
- {
--	debug_yield_read();
- 	read_barrier();
--	debug_yield_read();
- 	/*
- 	 * Finish using rcu before decrementing the pointer.
- 	 * See force_mb_all_threads().
- 	 */
- 	urcu_active_readers -= RCU_GP_COUNT;
--	debug_yield_read();
- }
- 
- /**
-@@ -302,7 +293,6 @@ extern void synchronize_rcu(void);
- #define urcu_publish_content(p, v) \
- 	({ \
- 		void *oldptr; \
--		debug_yield_write(); \
- 		oldptr = rcu_xchg_pointer(p, v); \
- 		synchronize_rcu(); \
- 		oldptr; \
diff --git a/rcutorture.h b/rcutorture.h
deleted file mode 100644
index 00b0b31..0000000
--- a/rcutorture.h
+++ /dev/null
@@ -1,417 +0,0 @@
-/*
- * rcutorture.h: simple user-level performance/stress test of RCU.
- *
- * Usage:
- * 	./rcu <nreaders> rperf [ <cpustride> ]
- * 		Run a read-side performance test with the specified
- * 		number of readers spaced by <cpustride>.
- * 		Thus "./rcu 16 rperf 2" would run 16 readers on even-numbered
- * 		CPUs from 0 to 30.
- * 	./rcu <nupdaters> uperf [ <cpustride> ]
- * 		Run an update-side performance test with the specified
- * 		number of updaters and specified CPU spacing.
- * 	./rcu <nreaders> perf [ <cpustride> ]
- * 		Run a combined read/update performance test with the specified
- * 		number of readers and one updater and specified CPU spacing.
- * 		The readers run on the low-numbered CPUs and the updater
- * 		of the highest-numbered CPU.
- *
- * The above tests produce output as follows:
- *
- * n_reads: 46008000  n_updates: 146026  nreaders: 2  nupdaters: 1 duration: 1
- * ns/read: 43.4707  ns/update: 6848.1
- *
- * The first line lists the total number of RCU reads and updates executed
- * during the test, the number of reader threads, the number of updater
- * threads, and the duration of the test in seconds.  The second line
- * lists the average duration of each type of operation in nanoseconds,
- * or "nan" if the corresponding type of operation was not performed.
- *
- * 	./rcu <nreaders> stress
- * 		Run a stress test with the specified number of readers and
- * 		one updater.  None of the threads are affinitied to any
- * 		particular CPU.
- *
- * This test produces output as follows:
- *
- * n_reads: 114633217  n_updates: 3903415  n_mberror: 0
- * rcu_stress_count: 114618391 14826 0 0 0 0 0 0 0 0 0
- *
- * The first line lists the number of RCU read and update operations
- * executed, followed by the number of memory-ordering violations
- * (which will be zero in a correct RCU implementation).  The second
- * line lists the number of readers observing progressively more stale
- * data.  A correct RCU implementation will have all but the first two
- * numbers non-zero.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (c) 2008 Paul E. McKenney, IBM Corporation.
- */
-
-/*
- * Test variables.
- */
-
-DEFINE_PER_THREAD(long long, n_reads_pt);
-DEFINE_PER_THREAD(long long, n_updates_pt);
-
-long long n_reads = 0LL;
-long n_updates = 0L;
-atomic_t nthreadsrunning;
-char argsbuf[64];
-
-#define GOFLAG_INIT 0
-#define GOFLAG_RUN  1
-#define GOFLAG_STOP 2
-
-int goflag __attribute__((__aligned__(CACHE_LINE_SIZE))) = GOFLAG_INIT;
-
-#define RCU_READ_RUN 1000
-
-//MD
-#define RCU_READ_NESTABLE
-
-#ifdef RCU_READ_NESTABLE
-#define rcu_read_lock_nest() rcu_read_lock()
-#define rcu_read_unlock_nest() rcu_read_unlock()
-#else /* #ifdef RCU_READ_NESTABLE */
-#define rcu_read_lock_nest()
-#define rcu_read_unlock_nest()
-#endif /* #else #ifdef RCU_READ_NESTABLE */
-
-#ifndef mark_rcu_quiescent_state
-#define mark_rcu_quiescent_state() do ; while (0)
-#endif /* #ifdef mark_rcu_quiescent_state */
-
-#ifndef put_thread_offline
-#define put_thread_offline()		do ; while (0)
-#define put_thread_online()		do ; while (0)
-#define put_thread_online_delay()	do ; while (0)
-#else /* #ifndef put_thread_offline */
-#define put_thread_online_delay()	synchronize_rcu()
-#endif /* #else #ifndef put_thread_offline */
-
-/*
- * Performance test.
- */
-
-void *rcu_read_perf_test(void *arg)
-{
-	int i;
-	int me = (long)arg;
-	long long n_reads_local = 0;
-
-	rcu_register_thread();
-	run_on(me);
-	atomic_inc(&nthreadsrunning);
-	while (goflag == GOFLAG_INIT)
-		poll(NULL, 0, 1);
-	mark_rcu_quiescent_state();
-	while (goflag == GOFLAG_RUN) {
-		for (i = 0; i < RCU_READ_RUN; i++) {
-			rcu_read_lock();
-			/* rcu_read_lock_nest(); */
-			/* rcu_read_unlock_nest(); */
-			rcu_read_unlock();
-		}
-		n_reads_local += RCU_READ_RUN;
-		mark_rcu_quiescent_state();
-	}
-	__get_thread_var(n_reads_pt) += n_reads_local;
-	put_thread_offline();
-	rcu_unregister_thread();
-
-	return (NULL);
-}
-
-void *rcu_update_perf_test(void *arg)
-{
-	long long n_updates_local = 0;
-
-	atomic_inc(&nthreadsrunning);
-	while (goflag == GOFLAG_INIT)
-		poll(NULL, 0, 1);
-	while (goflag == GOFLAG_RUN) {
-		synchronize_rcu();
-		n_updates_local++;
-	}
-	__get_thread_var(n_updates_pt) += n_updates_local;
-	return NULL;
-}
-
-void perftestinit(void)
-{
-	init_per_thread(n_reads_pt, 0LL);
-	init_per_thread(n_updates_pt, 0LL);
-	atomic_set(&nthreadsrunning, 0);
-}
-
-void perftestrun(int nthreads, int nreaders, int nupdaters)
-{
-	int t;
-	int duration = 1;
-
-	smp_mb();
-	while (atomic_read(&nthreadsrunning) < nthreads)
-		poll(NULL, 0, 1);
-	goflag = GOFLAG_RUN;
-	smp_mb();
-	sleep(duration);
-	smp_mb();
-	goflag = GOFLAG_STOP;
-	smp_mb();
-	wait_all_threads();
-	for_each_thread(t) {
-		n_reads += per_thread(n_reads_pt, t);
-		n_updates += per_thread(n_updates_pt, t);
-	}
-	printf("n_reads: %lld  n_updates: %ld  nreaders: %d  nupdaters: %d duration: %d\n",
-	       n_reads, n_updates, nreaders, nupdaters, duration);
-	printf("ns/read: %g  ns/update: %g\n",
-	       ((duration * 1000*1000*1000.*(double)nreaders) /
-	        (double)n_reads),
-	       ((duration * 1000*1000*1000.*(double)nupdaters) /
-	        (double)n_updates));
-	exit(0);
-}
-
-void perftest(int nreaders, int cpustride)
-{
-	int i;
-	long arg;
-
-	perftestinit();
-	for (i = 0; i < nreaders; i++) {
-		arg = (long)(i * cpustride);
-		create_thread(rcu_read_perf_test, (void *)arg);
-	}
-	arg = (long)(i * cpustride);
-	create_thread(rcu_update_perf_test, (void *)arg);
-	perftestrun(i + 1, nreaders, 1);
-}
-
-void rperftest(int nreaders, int cpustride)
-{
-	int i;
-	long arg;
-
-	perftestinit();
-	init_per_thread(n_reads_pt, 0LL);
-	for (i = 0; i < nreaders; i++) {
-		arg = (long)(i * cpustride);
-		create_thread(rcu_read_perf_test, (void *)arg);
-	}
-	perftestrun(i, nreaders, 0);
-}
-
-void uperftest(int nupdaters, int cpustride)
-{
-	int i;
-	long arg;
-
-	perftestinit();
-	init_per_thread(n_reads_pt, 0LL);
-	for (i = 0; i < nupdaters; i++) {
-		arg = (long)(i * cpustride);
-		create_thread(rcu_update_perf_test, (void *)arg);
-	}
-	perftestrun(i, 0, nupdaters);
-}
-
-/*
- * Stress test.
- */
-
-#define RCU_STRESS_PIPE_LEN 10
-
-struct rcu_stress {
-	int pipe_count;
-	int mbtest;
-};
-
-struct rcu_stress rcu_stress_array[RCU_STRESS_PIPE_LEN] = { { 0 } };
-struct rcu_stress *rcu_stress_current;
-int rcu_stress_idx = 0;
-
-int n_mberror = 0;
-DEFINE_PER_THREAD(long long [RCU_STRESS_PIPE_LEN + 1], rcu_stress_count);
-
-int garbage = 0;
-
-void *rcu_read_stress_test(void *arg)
-{
-	int i;
-	int itercnt = 0;
-	struct rcu_stress *p;
-	int pc;
-
-	rcu_register_thread();
-	while (goflag == GOFLAG_INIT)
-		poll(NULL, 0, 1);
-	mark_rcu_quiescent_state();
-	while (goflag == GOFLAG_RUN) {
-		rcu_read_lock();
-		p = rcu_dereference(rcu_stress_current);
-		if (p->mbtest == 0)
-			n_mberror++;
-		rcu_read_lock_nest();
-		for (i = 0; i < 100; i++)
-			garbage++;
-		rcu_read_unlock_nest();
-		pc = p->pipe_count;
-		rcu_read_unlock();
-		if ((pc > RCU_STRESS_PIPE_LEN) || (pc < 0))
-			pc = RCU_STRESS_PIPE_LEN;
-		__get_thread_var(rcu_stress_count)[pc]++;
-		__get_thread_var(n_reads_pt)++;
-		mark_rcu_quiescent_state();
-		if ((++itercnt % 0x1000) == 0) {
-			put_thread_offline();
-			put_thread_online_delay();
-			put_thread_online();
-		}
-	}
-	put_thread_offline();
-	rcu_unregister_thread();
-
-	return (NULL);
-}
-
-void *rcu_update_stress_test(void *arg)
-{
-	int i;
-	struct rcu_stress *p;
-
-	while (goflag == GOFLAG_INIT)
-		poll(NULL, 0, 1);
-	while (goflag == GOFLAG_RUN) {
-		i = rcu_stress_idx + 1;
-		if (i >= RCU_STRESS_PIPE_LEN)
-			i = 0;
-		p = &rcu_stress_array[i];
-		p->mbtest = 0;
-		smp_mb();
-		p->pipe_count = 0;
-		p->mbtest = 1;
-		rcu_assign_pointer(rcu_stress_current, p);
-		rcu_stress_idx = i;
-		for (i = 0; i < RCU_STRESS_PIPE_LEN; i++)
-			if (i != rcu_stress_idx)
-				rcu_stress_array[i].pipe_count++;
-		synchronize_rcu();
-		n_updates++;
-	}
-	return NULL;
-}
-
-void *rcu_fake_update_stress_test(void *arg)
-{
-	while (goflag == GOFLAG_INIT)
-		poll(NULL, 0, 1);
-	while (goflag == GOFLAG_RUN) {
-		synchronize_rcu();
-		poll(NULL, 0, 1);
-	}
-	return NULL;
-}
-
-void stresstest(int nreaders)
-{
-	int i;
-	int t;
-	long long *p;
-	long long sum;
-
-	init_per_thread(n_reads_pt, 0LL);
-	for_each_thread(t) {
-		p = &per_thread(rcu_stress_count,t)[0];
-		for (i = 0; i <= RCU_STRESS_PIPE_LEN; i++)
-			p[i] = 0LL;
-	}
-	rcu_stress_current = &rcu_stress_array[0];
-	rcu_stress_current->pipe_count = 0;
-	rcu_stress_current->mbtest = 1;
-	for (i = 0; i < nreaders; i++)
-		create_thread(rcu_read_stress_test, NULL);
-	create_thread(rcu_update_stress_test, NULL);
-	for (i = 0; i < 5; i++)
-		create_thread(rcu_fake_update_stress_test, NULL);
-	smp_mb();
-	goflag = GOFLAG_RUN;
-	smp_mb();
-	sleep(10);
-	smp_mb();
-	goflag = GOFLAG_STOP;
-	smp_mb();
-	wait_all_threads();
-	for_each_thread(t)
-		n_reads += per_thread(n_reads_pt, t);
-	printf("n_reads: %lld  n_updates: %ld  n_mberror: %d\n",
-	       n_reads, n_updates, n_mberror);
-	printf("rcu_stress_count:");
-	for (i = 0; i <= RCU_STRESS_PIPE_LEN; i++) {
-		sum = 0LL;
-		for_each_thread(t) {
-			sum += per_thread(rcu_stress_count, t)[i];
-		}
-		printf(" %lld", sum);
-	}
-	printf("\n");
-	exit(0);
-}
-
-/*
- * Mainprogram.
- */
-
-void usage(int argc, char *argv[])
-{
-	fprintf(stderr, "Usage: %s [nreaders [ perf | stress ] ]\n", argv[0]);
-	exit(-1);
-}
-
-int main(int argc, char *argv[])
-{
-	int nreaders = 1;
-	int cpustride = 1;
-
-	smp_init();
-	//rcu_init();
-
-#ifdef DEBUG_YIELD
-	yield_active |= YIELD_READ;
-	yield_active |= YIELD_WRITE;
-#endif
-
-	if (argc > 1) {
-		nreaders = strtoul(argv[1], NULL, 0);
-		if (argc == 2)
-			perftest(nreaders, cpustride);
-		if (argc > 3)
-			cpustride = strtoul(argv[3], NULL, 0);
-		if (strcmp(argv[2], "perf") == 0)
-			perftest(nreaders, cpustride);
-		else if (strcmp(argv[2], "rperf") == 0)
-			rperftest(nreaders, cpustride);
-		else if (strcmp(argv[2], "uperf") == 0)
-			uperftest(nreaders, cpustride);
-		else if (strcmp(argv[2], "stress") == 0)
-			stresstest(nreaders);
-		usage(argc, argv);
-	}
-	perftest(nreaders, cpustride);
-	return 0;
-}
diff --git a/runall.sh b/runall.sh
deleted file mode 100755
index e2b47dc..0000000
--- a/runall.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/sh
-
-#run all tests
-
-#set to number of active CPUS
-NUM_CPUS=8
-
-#extra options, e.g. for setting affinity on even CPUs :
-#EXTRA_OPTS=$(for a in $(seq 0 2 127); do echo -n "-a ${a} "; done)
-
-#ppc64 striding, use with NUM_CPUS=8
-
-#stride 1
-#EXTRA_OPTS=$(for a in $(seq 0 2 15); do echo -n "-a ${a} "; done)
-#stride 2
-#EXTRA_OPTS=$(for a in $(seq 0 4 31); do echo -n "-a ${a} "; done)
-#stride 4
-#EXTRA_OPTS=$(for a in $(seq 0 8 63); do echo -n "-a ${a} "; done)
-#stride 8
-#EXTRA_OPTS=$(for a in $(seq 0 16 127); do echo -n "-a ${a} "; done)
-
-#Vary update fraction
-#x: vary update fraction from 0 to 0.0001
-  #fix number of readers and reader C.S. length, vary delay between updates
-#y: ops/s
-
-rm -f runall.log
-rm -fr runall.detail.log
-
-
-echo Executing batch RCU test
-
-DURATION=10
-BATCH_ARRAY="1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536
-	     131072 262144"
-NR_WRITERS=$((${NUM_CPUS} / 2))
-
-rm -f batch-rcu.log
-
-NR_READERS=$((${NUM_CPUS} - ${NR_WRITERS}))
-for BATCH_SIZE in ${BATCH_ARRAY}; do
-	echo "./runtests-batch.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d 0 -b ${BATCH_SIZE} ${EXTRA_OPTS} | tee -a batch-rcu.log" >> runall.log
-	./runtests-batch.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d 0 -b ${BATCH_SIZE} ${EXTRA_OPTS} | tee -a batch-rcu.log
-done
-
-#setting gc each 32768. ** UPDATE FOR YOUR ARCHITECTURE BASED ON TEST ABOVE **
-EXTRA_OPTS+="-b 32768"
-
-echo Executing update fraction test
-
-DURATION=10
-WDELAY_ARRAY="0 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768
-              65536 131072 262144 524288 1048576 2097152 4194304 8388608
-              16777216 33554432 67108864 134217728"
-NR_WRITERS=$((${NUM_CPUS} / 2))
-
-rm -f update-fraction.log
-
-NR_READERS=$((${NUM_CPUS} - ${NR_WRITERS}))
-for WDELAY in ${WDELAY_ARRAY}; do
-	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d ${WDELAY} ${EXTRA_OPTS} | tee -a update-fraction.log" >> runall.log
-	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d ${WDELAY} ${EXTRA_OPTS} | tee -a update-fraction.log
-done
-
-#Test scalability :
-# x: vary number of readers from 0 to num cpus
-# y: ops/s
-# 0 writer.
-
-echo Executing scalability test
-
-NR_WRITERS=0
-DURATION=10
-
-rm -f scalability.log
-
-for NR_READERS in $(seq 1 ${NUM_CPUS}); do
-	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS}| tee -a scalability.log" >> runall.log
-	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS}| tee -a scalability.log
-done
-
-
-# x: Vary reader C.S. length from 0 to 100 us
-# y: ops/s
-# 8 readers
-# 0 writers
-
-echo Executing reader C.S. length test
-
-NR_READERS=${NUM_CPUS}
-NR_WRITERS=0
-DURATION=10
-#in loops.
-READERCSLEN_ARRAY="0 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152"
-
-rm -f readercslen.log
-
-for READERCSLEN in ${READERCSLEN_ARRAY}; do
-	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS} -c ${READERCSLEN} | tee -a readercslen.log" >> runall.log
-	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS} -c ${READERCSLEN} | tee -a readercslen.log
-done
diff --git a/runpaul-phase1.sh b/runpaul-phase1.sh
deleted file mode 100755
index d2c8649..0000000
--- a/runpaul-phase1.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/sh
-
-#run all tests
-
-#set to number of active CPUS
-NUM_CPUS=64
-
-#extra options, e.g. for setting affinity on even CPUs :
-EXTRA_OPTS=$(for a in $(seq 0 2 127); do echo -n "-a ${a} "; done)
-
-#ppc64 striding, use with NUM_CPUS=8
-
-#stride 1
-#EXTRA_OPTS=$(for a in $(seq 0 2 15); do echo -n "-a ${a} "; done)
-#stride 2
-#EXTRA_OPTS=$(for a in $(seq 0 4 31); do echo -n "-a ${a} "; done)
-#stride 4
-#EXTRA_OPTS=$(for a in $(seq 0 8 63); do echo -n "-a ${a} "; done)
-#stride 8
-#EXTRA_OPTS=$(for a in $(seq 0 16 127); do echo -n "-a ${a} "; done)
-
-#Vary update fraction
-#x: vary update fraction from 0 to 0.0001
-  #fix number of readers and reader C.S. length, vary delay between updates
-#y: ops/s
-
-rm -f runall.log
-rm -fr runall.detail.log
-
-
-echo Executing batch RCU test
-
-DURATION=10
-BATCH_ARRAY="1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536
-	     131072 262144"
-NR_WRITERS=$((${NUM_CPUS} / 2))
-
-rm -f batch-rcu.log
-
-NR_READERS=$((${NUM_CPUS} - ${NR_WRITERS}))
-for BATCH_SIZE in ${BATCH_ARRAY}; do
-	echo "./runtests-batch.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d 0 -b ${BATCH_SIZE} ${EXTRA_OPTS} | tee -a batch-rcu.log" >> runall.log
-	./runtests-batch.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d 0 -b ${BATCH_SIZE} ${EXTRA_OPTS} | tee -a batch-rcu.log
-done
diff --git a/runpaul-phase2.sh b/runpaul-phase2.sh
deleted file mode 100755
index 0f6bd91..0000000
--- a/runpaul-phase2.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/sh
-
-#run all tests
-
-#set to number of active CPUS
-NUM_CPUS=64
-
-#extra options, e.g. for setting affinity on even CPUs :
-EXTRA_OPTS=$(for a in $(seq 0 2 127); do echo -n "-a ${a} "; done)
-
-#ppc64 striding, use with NUM_CPUS=8
-
-#stride 1
-#EXTRA_OPTS=$(for a in $(seq 0 2 15); do echo -n "-a ${a} "; done)
-#stride 2
-#EXTRA_OPTS=$(for a in $(seq 0 4 31); do echo -n "-a ${a} "; done)
-#stride 4
-#EXTRA_OPTS=$(for a in $(seq 0 8 63); do echo -n "-a ${a} "; done)
-#stride 8
-#EXTRA_OPTS=$(for a in $(seq 0 16 127); do echo -n "-a ${a} "; done)
-
-#Vary update fraction
-#x: vary update fraction from 0 to 0.0001
-  #fix number of readers and reader C.S. length, vary delay between updates
-#y: ops/s
-
-rm -f runall.log
-rm -fr runall.detail.log
-
-#setting gc each 32768. ** UPDATE FOR YOUR ARCHITECTURE BASED ON PHASE 1 RESULT **
-EXTRA_OPTS+="-b 32768"
-
-echo Executing update fraction test
-
-DURATION=10
-WDELAY_ARRAY="0 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768
-              65536 131072 262144 524288 1048576 2097152 4194304 8388608
-              16777216 33554432 67108864 134217728"
-NR_WRITERS=$((${NUM_CPUS} / 2))
-
-rm -f update-fraction.log
-
-NR_READERS=$((${NUM_CPUS} - ${NR_WRITERS}))
-for WDELAY in ${WDELAY_ARRAY}; do
-	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d ${WDELAY} ${EXTRA_OPTS} | tee -a update-fraction.log" >> runall.log
-	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d ${WDELAY} ${EXTRA_OPTS} | tee -a update-fraction.log
-done
diff --git a/runpaul-phase3.sh b/runpaul-phase3.sh
deleted file mode 100755
index 1956062..0000000
--- a/runpaul-phase3.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/sh
-
-#run all tests
-
-#set to number of active CPUS
-NUM_CPUS=64
-
-#extra options, e.g. for setting affinity on even CPUs :
-EXTRA_OPTS=$(for a in $(seq 0 2 127); do echo -n "-a ${a} "; done)
-
-#ppc64 striding, use with NUM_CPUS=8
-
-#stride 1
-#EXTRA_OPTS=$(for a in $(seq 0 2 15); do echo -n "-a ${a} "; done)
-#stride 2
-#EXTRA_OPTS=$(for a in $(seq 0 4 31); do echo -n "-a ${a} "; done)
-#stride 4
-#EXTRA_OPTS=$(for a in $(seq 0 8 63); do echo -n "-a ${a} "; done)
-#stride 8
-#EXTRA_OPTS=$(for a in $(seq 0 16 127); do echo -n "-a ${a} "; done)
-
-#Vary update fraction
-#x: vary update fraction from 0 to 0.0001
-  #fix number of readers and reader C.S. length, vary delay between updates
-#y: ops/s
-
-rm -f runall.log
-rm -fr runall.detail.log
-
-#setting gc each 32768. ** UPDATE FOR YOUR ARCHITECTURE BASED ON PHASE 1 RESULT **
-EXTRA_OPTS+="-b 32768"
-
-#Test scalability :
-# x: vary number of readers from 0 to num cpus
-# y: ops/s
-# 0 writer.
-
-echo Executing scalability test
-
-NR_WRITERS=0
-DURATION=10
-
-rm -f scalability.log
-
-for NR_READERS in $(seq 1 ${NUM_CPUS}); do
-	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS}| tee -a scalability.log" >> runall.log
-	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS}| tee -a scalability.log
-done
-
-
diff --git a/runpaul-phase4.sh b/runpaul-phase4.sh
deleted file mode 100755
index ede402c..0000000
--- a/runpaul-phase4.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/sh
-
-#run all tests
-
-#set to number of active CPUS
-export NUM_CPUS=8
-
-#extra options, e.g. for setting affinity on even CPUs :
-#EXTRA_OPTS=$(for a in $(seq 0 2 127); do echo -n "-a ${a} "; done)
-
-#ppc64 striding, use with NUM_CPUS=8
-
-rm -f *.log
-
-#stride 1
-export EXTRA_OPTS=$(for a in $(seq 0 2 15); do echo -n "-a ${a} "; done)
-sh subphase4.sh $*
-mkdir ppc64-8cores-stride1
-mv *.log ppc64-8cores-stride1/
-
-
-#stride 2
-export EXTRA_OPTS=$(for a in $(seq 0 4 31); do echo -n "-a ${a} "; done)
-sh subphase4.sh $*
-mkdir ppc64-8cores-stride2
-mv *.log ppc64-8cores-stride2/
-
-
-#stride 4
-export EXTRA_OPTS=$(for a in $(seq 0 8 63); do echo -n "-a ${a} "; done)
-sh subphase4.sh $*
-mkdir ppc64-8cores-stride4
-mv *.log ppc64-8cores-stride4/
-
-
-#stride 8
-export EXTRA_OPTS=$(for a in $(seq 0 16 127); do echo -n "-a ${a} "; done)
-sh subphase4.sh $*
-mkdir ppc64-8cores-stride8
-mv *.log ppc64-8cores-stride8/
diff --git a/runpaul-phase5.sh b/runpaul-phase5.sh
deleted file mode 100644
index bb4bfe7..0000000
--- a/runpaul-phase5.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-# test run after write-size update
-
-sh runpaul-phase1.sh
-mkdir runpaul-phase1
-mv *.log runpaul-phase1/
-
-sh runpaul-phase2.sh
-mkdir runpaul-phase2
-mv *.log runpaul-phase2/
-
-sh runpaul-phase4.sh
diff --git a/runpaul-phase6.sh b/runpaul-phase6.sh
deleted file mode 100644
index 5f65072..0000000
--- a/runpaul-phase6.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-sh runpaul-phase1.sh
-mkdir runpaul-phase1
-mv *.log runpaul-phase1/
-
-sh runpaul-phase2.sh
-mkdir runpaul-phase2
-mv *.log runpaul-phase2/
diff --git a/runtests-batch.sh b/runtests-batch.sh
deleted file mode 100755
index 67e019b..0000000
--- a/runtests-batch.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-
-#for a in test_urcu_gc test_urcu_gc_mb test_qsbr_gc; do
-for a in test_urcu_gc; do
-	echo "./${a} $*" | tee -a runall.detail.log
-	/usr/bin/time --append --output runall.detail.log ./${a} $*
-done
-
diff --git a/runtests.sh b/runtests.sh
deleted file mode 100755
index 981aef1..0000000
--- a/runtests.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-
-for a in test_urcu_gc test_urcu_gc_mb test_urcu test_urcu_mb \
-			test_urcu_lgc test_qsbr_lgc test_urcu_lgc_mb \
-			test_qsbr test_qsbr_gc test_rwlock test_perthreadlock \
-			test_mutex; do
-	echo "./${a} $*" | tee -a runall.detail.log
-	/usr/bin/time --append --output runall.detail.log ./${a} $*
-done
-
diff --git a/subphase4.sh b/subphase4.sh
deleted file mode 100755
index b69ffbf..0000000
--- a/subphase4.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/sh
-
-#run all tests
-
-#set to number of active CPUS
-#NUM_CPUS=8
-
-#extra options, e.g. for setting affinity on even CPUs :
-#EXTRA_OPTS=$(for a in $(seq 0 2 127); do echo -n "-a ${a} "; done)
-
-#ppc64 striding, use with NUM_CPUS=8
-
-#stride 1
-#EXTRA_OPTS=$(for a in $(seq 0 2 15); do echo -n "-a ${a} "; done)
-#stride 2
-#EXTRA_OPTS=$(for a in $(seq 0 4 31); do echo -n "-a ${a} "; done)
-#stride 4
-#EXTRA_OPTS=$(for a in $(seq 0 8 63); do echo -n "-a ${a} "; done)
-#stride 8
-#EXTRA_OPTS=$(for a in $(seq 0 16 127); do echo -n "-a ${a} "; done)
-
-#Vary update fraction
-#x: vary update fraction from 0 to 0.0001
-  #fix number of readers and reader C.S. length, vary delay between updates
-#y: ops/s
-
-rm -f runall.log
-rm -fr runall.detail.log
-
-
-echo Executing batch RCU test
-
-DURATION=10
-BATCH_ARRAY="1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536
-	     131072 262144"
-NR_WRITERS=$((${NUM_CPUS} / 2))
-
-rm -f batch-rcu.log
-
-NR_READERS=$((${NUM_CPUS} - ${NR_WRITERS}))
-for BATCH_SIZE in ${BATCH_ARRAY}; do
-	echo "./runtests-batch.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d 0 -b ${BATCH_SIZE} ${EXTRA_OPTS} | tee -a batch-rcu.log" >> runall.log
-	./runtests-batch.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d 0 -b ${BATCH_SIZE} ${EXTRA_OPTS} | tee -a batch-rcu.log
-done
-
-#setting gc each 4096. ** UPDATE FOR YOUR ARCHITECTURE BASED ON TEST ABOVE **
-EXTRA_OPTS+="-b 32768"
-
-echo Executing update fraction test
-
-DURATION=10
-WDELAY_ARRAY="0 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768
-              65536 131072 262144 524288 1048576 2097152 4194304 8388608
-              16777216 33554432 67108864 134217728"
-NR_WRITERS=$((${NUM_CPUS} / 2))
-
-rm -f update-fraction.log
-
-NR_READERS=$((${NUM_CPUS} - ${NR_WRITERS}))
-for WDELAY in ${WDELAY_ARRAY}; do
-	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d ${WDELAY} ${EXTRA_OPTS} | tee -a update-fraction.log" >> runall.log
-	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d ${WDELAY} ${EXTRA_OPTS} | tee -a update-fraction.log
-done
-
-#Test scalability :
-# x: vary number of readers from 0 to num cpus
-# y: ops/s
-# 0 writer.
-
-echo Executing scalability test
-
-NR_WRITERS=0
-DURATION=10
-
-rm -f scalability.log
-
-for NR_READERS in $(seq 1 ${NUM_CPUS}); do
-	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS}| tee -a scalability.log" >> runall.log
-	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS}| tee -a scalability.log
-done
-
-
-# x: Vary reader C.S. length from 0 to 100 us
-# y: ops/s
-# 8 readers
-# 0 writers
-
-echo Executing reader C.S. length test
-
-NR_READERS=${NUM_CPUS}
-NR_WRITERS=0
-DURATION=10
-#in loops.
-READERCSLEN_ARRAY="0 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152"
-
-rm -f readercslen.log
-
-for READERCSLEN in ${READERCSLEN_ARRAY}; do
-	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS} -c ${READERCSLEN} | tee -a readercslen.log" >> runall.log
-	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS} -c ${READERCSLEN} | tee -a readercslen.log
-done
diff --git a/test_looplen.c b/test_looplen.c
deleted file mode 100644
index e07409a..0000000
--- a/test_looplen.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * test_looplen.c
- *
- * Userspace RCU library - test program
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sched.h>
-
-#include "arch.h"
-
-#if defined(_syscall0)
-_syscall0(pid_t, gettid)
-#elif defined(__NR_gettid)
-static inline pid_t gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#else
-#warning "use pid as tid"
-static inline pid_t gettid(void)
-{
-	return getpid();
-}
-#endif
-
-#ifndef DYNAMIC_LINK_TEST
-#define _LGPL_SOURCE
-#else
-#define debug_yield_read()
-#endif
-#include "urcu.h"
-
-static inline void loop_sleep(unsigned long l)
-{
-	while(l-- != 0)
-		cpu_relax();
-}
-
-#define LOOPS 1048576
-#define TESTS 10
-
-int main(int argc, char **argv)
-{
-	unsigned long i;
-	cycles_t time1, time2;
-	cycles_t time_tot = 0;
-	double cpl;
-
-	for (i = 0; i < TESTS; i++) {
-		time1 = get_cycles();
-		loop_sleep(LOOPS);
-		time2 = get_cycles();
-		time_tot += time2 - time1;
-	}
-	cpl = ((double)time_tot) / (double)TESTS / (double)LOOPS;
-
-	printf("CALIBRATION : %g cycles per loop\n", cpl);
-	printf("time_tot = %llu, LOOPS = %d, TESTS = %d\n",
-	       time_tot, LOOPS, TESTS);
-
-	return 0;
-}
diff --git a/test_mutex.c b/test_mutex.c
deleted file mode 100644
index e824980..0000000
--- a/test_mutex.c
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * test_urcu.c
- *
- * Userspace RCU library - test program
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sched.h>
-
-#include "arch.h"
-
-/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
-#define CACHE_LINE_SIZE 4096
-
-/* hardcoded number of CPUs */
-#define NR_CPUS 16384
-
-#if defined(_syscall0)
-_syscall0(pid_t, gettid)
-#elif defined(__NR_gettid)
-static inline pid_t gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#else
-#warning "use pid as tid"
-static inline pid_t gettid(void)
-{
-	return getpid();
-}
-#endif
-
-#ifndef DYNAMIC_LINK_TEST
-#define _LGPL_SOURCE
-#else
-#define debug_yield_read()
-#endif
-#include "urcu.h"
-
-struct test_array {
-	int a;
-};
-
-static pthread_mutex_t lock;
-
-static volatile int test_go, test_stop;
-
-static unsigned long wdelay;
-
-static volatile struct test_array test_array = { 8 };
-
-static unsigned long duration;
-
-/* read-side C.S. duration, in loops */
-static unsigned long rduration;
-
-static inline void loop_sleep(unsigned long l)
-{
-	while(l-- != 0)
-		cpu_relax();
-}
-
-static int verbose_mode;
-
-#define printf_verbose(fmt, args...)		\
-	do {					\
-		if (verbose_mode)		\
-			printf(fmt, args);	\
-	} while (0)
-
-static unsigned int cpu_affinities[NR_CPUS];
-static unsigned int next_aff = 0;
-static int use_affinity = 0;
-
-pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static void set_affinity(void)
-{
-	cpu_set_t mask;
-	int cpu;
-	int ret;
-
-	if (!use_affinity)
-		return;
-
-	ret = pthread_mutex_lock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-	cpu = cpu_affinities[next_aff++];
-	ret = pthread_mutex_unlock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-	CPU_ZERO(&mask);
-	CPU_SET(cpu, &mask);
-	sched_setaffinity(0, sizeof(mask), &mask);
-}
-
-/*
- * returns 0 if test should end.
- */
-static int test_duration_write(void)
-{
-	return !test_stop;
-}
-
-static int test_duration_read(void)
-{
-	return !test_stop;
-}
-
-static unsigned long long __thread nr_writes;
-static unsigned long long __thread nr_reads;
-
-static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
-static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_reads;
-
-static unsigned int nr_readers;
-static unsigned int nr_writers;
-
-pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void rcu_copy_mutex_lock(void)
-{
-	int ret;
-	ret = pthread_mutex_lock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-}
-
-void rcu_copy_mutex_unlock(void)
-{
-	int ret;
-
-	ret = pthread_mutex_unlock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-}
-
-void *thr_reader(void *data)
-{
-	unsigned long tidx = (unsigned long)data;
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	while (!test_go)
-	{
-	}
-
-	for (;;) {
-		pthread_mutex_lock(&lock);
-		assert(test_array.a == 8);
-		if (unlikely(rduration))
-			loop_sleep(rduration);
-		pthread_mutex_unlock(&lock);
-		nr_reads++;
-		if (unlikely(!test_duration_read()))
-			break;
-	}
-
-	tot_nr_reads[tidx] = nr_reads;
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	return ((void*)1);
-
-}
-
-void *thr_writer(void *data)
-{
-	unsigned long wtidx = (unsigned long)data;
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	while (!test_go)
-	{
-	}
-	smp_mb();
-
-	for (;;) {
-		pthread_mutex_lock(&lock);
-		test_array.a = 0;
-		test_array.a = 8;
-		pthread_mutex_unlock(&lock);
-		nr_writes++;
-		if (unlikely(!test_duration_write()))
-			break;
-		if (unlikely(wdelay))
-			loop_sleep(wdelay);
-	}
-
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	tot_nr_writes[wtidx] = nr_writes;
-	return ((void*)2);
-}
-
-void show_usage(int argc, char **argv)
-{
-	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
-#ifdef DEBUG_YIELD
-	printf(" [-r] [-w] (yield reader and/or writer)");
-#endif
-	printf(" [-d delay] (writer period (us))");
-	printf(" [-c duration] (reader C.S. duration (in loops))");
-	printf(" [-v] (verbose output)");
-	printf(" [-a cpu#] [-a cpu#]... (affinity)");
-	printf("\n");
-}
-
-int main(int argc, char **argv)
-{
-	int err;
-	pthread_t *tid_reader, *tid_writer;
-	void *tret;
-	unsigned long long *count_reader, *count_writer;
-	unsigned long long tot_reads = 0, tot_writes = 0;
-	int i, a;
-
-	if (argc < 4) {
-		show_usage(argc, argv);
-		return -1;
-	}
-	smp_mb();
-
-	err = sscanf(argv[1], "%u", &nr_readers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	err = sscanf(argv[2], "%u", &nr_writers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-	
-	err = sscanf(argv[3], "%lu", &duration);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	for (i = 4; i < argc; i++) {
-		if (argv[i][0] != '-')
-			continue;
-		switch (argv[i][1]) {
-#ifdef DEBUG_YIELD
-		case 'r':
-			yield_active |= YIELD_READ;
-			break;
-		case 'w':
-			yield_active |= YIELD_WRITE;
-			break;
-#endif
-		case 'a':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			a = atoi(argv[++i]);
-			cpu_affinities[next_aff++] = a;
-			use_affinity = 1;
-			printf_verbose("Adding CPU %d affinity\n", a);
-			break;
-		case 'c':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			rduration = atol(argv[++i]);
-			break;
-		case 'd':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			wdelay = atol(argv[++i]);
-			break;
-		case 'v':
-			verbose_mode = 1;
-			break;
-		}
-	}
-
-	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
-		duration, nr_readers, nr_writers);
-	printf_verbose("Writer delay : %lu loops.\n", wdelay);
-	printf_verbose("Reader duration : %lu loops.\n", rduration);
-	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
-			"main", pthread_self(), (unsigned long)gettid());
-
-	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
-	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
-	count_reader = malloc(sizeof(*count_reader) * nr_readers);
-	count_writer = malloc(sizeof(*count_writer) * nr_writers);
-	tot_nr_reads = malloc(sizeof(*tot_nr_reads) * nr_readers);
-	tot_nr_writes = malloc(sizeof(*tot_nr_writes) * nr_writers);
-
-	next_aff = 0;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_create(&tid_reader[i], NULL, thr_reader,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_create(&tid_writer[i], NULL, thr_writer,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-
-	smp_mb();
-
-	test_go = 1;
-
-	sleep(duration);
-
-	test_stop = 1;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_join(tid_reader[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_reads += tot_nr_reads[i];
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_join(tid_writer[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_writes += tot_nr_writes[i];
-	}
-
-	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
-	       tot_writes);
-	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
-		"nr_writers %3u "
-		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu\n",
-		argv[0], duration, nr_readers, rduration,
-		nr_writers, wdelay, tot_reads, tot_writes,
-		tot_reads + tot_writes);
-
-	free(tid_reader);
-	free(tid_writer);
-	free(count_reader);
-	free(count_writer);
-	free(tot_nr_reads);
-	free(tot_nr_writes);
-	return 0;
-}
diff --git a/test_perthreadlock.c b/test_perthreadlock.c
deleted file mode 100644
index 31b7721..0000000
--- a/test_perthreadlock.c
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- * test_urcu.c
- *
- * Userspace RCU library - test program
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sched.h>
-
-#include "arch.h"
-
-/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
-#define CACHE_LINE_SIZE 4096
-
-/* hardcoded number of CPUs */
-#define NR_CPUS 16384
-
-#if defined(_syscall0)
-_syscall0(pid_t, gettid)
-#elif defined(__NR_gettid)
-static inline pid_t gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#else
-#warning "use pid as tid"
-static inline pid_t gettid(void)
-{
-	return getpid();
-}
-#endif
-
-#ifndef DYNAMIC_LINK_TEST
-#define _LGPL_SOURCE
-#else
-#define debug_yield_read()
-#endif
-#include "urcu.h"
-
-struct test_array {
-	int a;
-};
-
-struct per_thread_lock {
-	pthread_mutex_t lock;
-} __attribute__((aligned(CACHE_LINE_SIZE)));	/* cache-line aligned */
-
-static struct per_thread_lock *per_thread_lock;
-
-static volatile int test_go, test_stop;
-
-static unsigned long wdelay;
-
-static volatile struct test_array test_array = { 8 };
-
-static unsigned long duration;
-
-/* read-side C.S. duration, in loops */
-static unsigned long rduration;
-
-static inline void loop_sleep(unsigned long l)
-{
-	while(l-- != 0)
-		cpu_relax();
-}
-
-static int verbose_mode;
-
-#define printf_verbose(fmt, args...)		\
-	do {					\
-		if (verbose_mode)		\
-			printf(fmt, args);	\
-	} while (0)
-
-static unsigned int cpu_affinities[NR_CPUS];
-static unsigned int next_aff = 0;
-static int use_affinity = 0;
-
-pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static void set_affinity(void)
-{
-	cpu_set_t mask;
-	int cpu;
-	int ret;
-
-	if (!use_affinity)
-		return;
-
-	ret = pthread_mutex_lock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-	cpu = cpu_affinities[next_aff++];
-	ret = pthread_mutex_unlock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-	CPU_ZERO(&mask);
-	CPU_SET(cpu, &mask);
-	sched_setaffinity(0, sizeof(mask), &mask);
-}
-
-/*
- * returns 0 if test should end.
- */
-static int test_duration_write(void)
-{
-	return !test_stop;
-}
-
-static int test_duration_read(void)
-{
-	return !test_stop;
-}
-
-static unsigned long long __thread nr_writes;
-static unsigned long long __thread nr_reads;
-
-static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
-static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_reads;
-
-static unsigned int nr_readers;
-static unsigned int nr_writers;
-
-pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void rcu_copy_mutex_lock(void)
-{
-	int ret;
-	ret = pthread_mutex_lock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-}
-
-void rcu_copy_mutex_unlock(void)
-{
-	int ret;
-
-	ret = pthread_mutex_unlock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-}
-
-void *thr_reader(void *data)
-{
-	unsigned long tidx = (unsigned long)data;
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	while (!test_go)
-	{
-	}
-
-	for (;;) {
-		pthread_mutex_lock(&per_thread_lock[tidx].lock);
-		assert(test_array.a == 8);
-		if (unlikely(rduration))
-			loop_sleep(rduration);
-		pthread_mutex_unlock(&per_thread_lock[tidx].lock);
-		nr_reads++;
-		if (unlikely(!test_duration_read()))
-			break;
-	}
-
-	tot_nr_reads[tidx] = nr_reads;
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	return ((void*)1);
-
-}
-
-void *thr_writer(void *data)
-{
-	unsigned long wtidx = (unsigned long)data;
-	long tidx;
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	while (!test_go)
-	{
-	}
-	smp_mb();
-
-	for (;;) {
-		for (tidx = 0; tidx < nr_readers; tidx++) {
-			pthread_mutex_lock(&per_thread_lock[tidx].lock);
-		}
-		test_array.a = 0;
-		test_array.a = 8;
-		for (tidx = (long)nr_readers - 1; tidx >= 0; tidx--) {
-			pthread_mutex_unlock(&per_thread_lock[tidx].lock);
-		}
-		nr_writes++;
-		if (unlikely(!test_duration_write()))
-			break;
-		if (unlikely(wdelay))
-			loop_sleep(wdelay);
-	}
-
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	tot_nr_writes[wtidx] = nr_writes;
-	return ((void*)2);
-}
-
-void show_usage(int argc, char **argv)
-{
-	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
-#ifdef DEBUG_YIELD
-	printf(" [-r] [-w] (yield reader and/or writer)");
-#endif
-	printf(" [-d delay] (writer period (us))");
-	printf(" [-c duration] (reader C.S. duration (in loops))");
-	printf(" [-v] (verbose output)");
-	printf(" [-a cpu#] [-a cpu#]... (affinity)");
-	printf("\n");
-}
-
-int main(int argc, char **argv)
-{
-	int err;
-	pthread_t *tid_reader, *tid_writer;
-	void *tret;
-	unsigned long long *count_reader, *count_writer;
-	unsigned long long tot_reads = 0, tot_writes = 0;
-	int i, a;
-
-	if (argc < 4) {
-		show_usage(argc, argv);
-		return -1;
-	}
-	smp_mb();
-
-	err = sscanf(argv[1], "%u", &nr_readers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	err = sscanf(argv[2], "%u", &nr_writers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-	
-	err = sscanf(argv[3], "%lu", &duration);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	for (i = 4; i < argc; i++) {
-		if (argv[i][0] != '-')
-			continue;
-		switch (argv[i][1]) {
-#ifdef DEBUG_YIELD
-		case 'r':
-			yield_active |= YIELD_READ;
-			break;
-		case 'w':
-			yield_active |= YIELD_WRITE;
-			break;
-#endif
-		case 'a':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			a = atoi(argv[++i]);
-			cpu_affinities[next_aff++] = a;
-			use_affinity = 1;
-			printf_verbose("Adding CPU %d affinity\n", a);
-			break;
-		case 'c':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			rduration = atol(argv[++i]);
-			break;
-		case 'd':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			wdelay = atol(argv[++i]);
-			break;
-		case 'v':
-			verbose_mode = 1;
-			break;
-		}
-	}
-
-	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
-		duration, nr_readers, nr_writers);
-	printf_verbose("Writer delay : %lu loops.\n", wdelay);
-	printf_verbose("Reader duration : %lu loops.\n", rduration);
-	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
-			"main", pthread_self(), (unsigned long)gettid());
-
-	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
-	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
-	count_reader = malloc(sizeof(*count_reader) * nr_readers);
-	count_writer = malloc(sizeof(*count_writer) * nr_writers);
-	tot_nr_reads = malloc(sizeof(*tot_nr_reads) * nr_readers);
-	tot_nr_writes = malloc(sizeof(*tot_nr_writes) * nr_writers);
-	per_thread_lock = malloc(sizeof(*per_thread_lock) * nr_readers);
-
-	next_aff = 0;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_create(&tid_reader[i], NULL, thr_reader,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_create(&tid_writer[i], NULL, thr_writer,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-
-	smp_mb();
-
-	test_go = 1;
-
-	sleep(duration);
-
-	test_stop = 1;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_join(tid_reader[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_reads += tot_nr_reads[i];
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_join(tid_writer[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_writes += tot_nr_writes[i];
-	}
-
-	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
-	       tot_writes);
-	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
-		"nr_writers %3u "
-		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu\n",
-		argv[0], duration, nr_readers, rduration,
-		nr_writers, wdelay, tot_reads, tot_writes,
-		tot_reads + tot_writes);
-
-	free(tid_reader);
-	free(tid_writer);
-	free(count_reader);
-	free(count_writer);
-	free(tot_nr_reads);
-	free(tot_nr_writes);
-	free(per_thread_lock);
-	return 0;
-}
diff --git a/test_perthreadlock_timing.c b/test_perthreadlock_timing.c
deleted file mode 100644
index 3e8268b..0000000
--- a/test_perthreadlock_timing.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * test_perthreadloc_timing.c
- *
- * Per thread locks - test program
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <pthread.h>
-#include <arch.h>
-
-/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
-#define CACHE_LINE_SIZE 4096
-
-#if defined(_syscall0)
-_syscall0(pid_t, gettid)
-#elif defined(__NR_gettid)
-static inline pid_t gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#else
-#warning "use pid as tid"
-static inline pid_t gettid(void)
-{
-	return getpid();
-}
-#endif
-
-#include "urcu.h"
-
-struct test_array {
-	int a;
-};
-
-static struct test_array test_array = { 8 };
-
-struct per_thread_lock {
-	pthread_mutex_t lock;
-} __attribute__((aligned(CACHE_LINE_SIZE)));	/* cache-line aligned */
-
-static struct per_thread_lock *per_thread_lock;
-
-#define OUTER_READ_LOOP	200U
-#define INNER_READ_LOOP	100000U
-#define READ_LOOP ((unsigned long long)OUTER_READ_LOOP * INNER_READ_LOOP)
-
-#define OUTER_WRITE_LOOP 10U
-#define INNER_WRITE_LOOP 200U
-#define WRITE_LOOP ((unsigned long long)OUTER_WRITE_LOOP * INNER_WRITE_LOOP)
-
-static int num_read;
-static int num_write;
-
-#define NR_READ num_read
-#define NR_WRITE num_write
-
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
-
-void *thr_reader(void *arg)
-{
-	int i, j;
-	cycles_t time1, time2;
-	long tidx = (long)arg;
-
-	printf("thread_begin %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	sleep(2);
-
-	time1 = get_cycles();
-	for (i = 0; i < OUTER_READ_LOOP; i++) {
-		for (j = 0; j < INNER_READ_LOOP; j++) {
-			pthread_mutex_lock(&per_thread_lock[tidx].lock);
-			assert(test_array.a == 8);
-			pthread_mutex_unlock(&per_thread_lock[tidx].lock);
-		}
-	}
-	time2 = get_cycles();
-
-	reader_time[tidx] = time2 - time1;
-
-	sleep(2);
-	printf("thread_end %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	return ((void*)1);
-
-}
-
-void *thr_writer(void *arg)
-{
-	int i, j;
-	long tidx;
-	cycles_t time1, time2;
-
-	printf("thread_begin %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	sleep(2);
-
-	for (i = 0; i < OUTER_WRITE_LOOP; i++) {
-		for (j = 0; j < INNER_WRITE_LOOP; j++) {
-			time1 = get_cycles();
-			for (tidx = 0; tidx < NR_READ; tidx++) {
-				pthread_mutex_lock(&per_thread_lock[tidx].lock);
-			}
-			test_array.a = 8;
-			for (tidx = NR_READ - 1; tidx >= 0; tidx--) {
-				pthread_mutex_unlock(&per_thread_lock[tidx].lock);
-			}
-			time2 = get_cycles();
-			writer_time[(unsigned long)arg] += time2 - time1;
-			usleep(1);
-		}
-	}
-
-	printf("thread_end %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	return ((void*)2);
-}
-
-int main(int argc, char **argv)
-{
-	int err;
-	pthread_t *tid_reader, *tid_writer;
-	void *tret;
-	int i;
-	cycles_t tot_rtime = 0;
-	cycles_t tot_wtime = 0;
-
-	if (argc < 2) {
-		printf("Usage : %s nr_readers nr_writers\n", argv[0]);
-		exit(-1);
-	}
-	num_read = atoi(argv[1]);
-	num_write = atoi(argv[2]);
-
-	reader_time = malloc(sizeof(*reader_time) * num_read);
-	writer_time = malloc(sizeof(*writer_time) * num_write);
-	tid_reader = malloc(sizeof(*tid_reader) * num_read);
-	tid_writer = malloc(sizeof(*tid_writer) * num_write);
-
-	printf("thread %-6s, thread id : %lx, tid %lu\n",
-			"main", pthread_self(), (unsigned long)gettid());
-
-	per_thread_lock = malloc(sizeof(struct per_thread_lock) * NR_READ);
-
-	for (i = 0; i < NR_READ; i++) {
-		pthread_mutex_init(&per_thread_lock[i].lock, NULL);
-	}
-	for (i = 0; i < NR_READ; i++) {
-		err = pthread_create(&tid_reader[i], NULL, thr_reader,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-	for (i = 0; i < NR_WRITE; i++) {
-		err = pthread_create(&tid_writer[i], NULL, thr_writer,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-
-	sleep(10);
-
-	for (i = 0; i < NR_READ; i++) {
-		err = pthread_join(tid_reader[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_rtime += reader_time[i];
-	}
-	for (i = 0; i < NR_WRITE; i++) {
-		err = pthread_join(tid_writer[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_wtime += writer_time[i];
-	}
-	printf("Time per read : %g cycles\n",
-	       (double)tot_rtime / ((double)NR_READ * (double)READ_LOOP));
-	printf("Time per write : %g cycles\n",
-	       (double)tot_wtime / ((double)NR_WRITE * (double)WRITE_LOOP));
-	free(per_thread_lock);
-
-	free(reader_time);
-	free(writer_time);
-	free(tid_reader);
-	free(tid_writer);
-
-	return 0;
-}
diff --git a/test_qsbr.c b/test_qsbr.c
deleted file mode 100644
index 97b918d..0000000
--- a/test_qsbr.c
+++ /dev/null
@@ -1,430 +0,0 @@
-/*
- * test_urcu.c
- *
- * Userspace RCU library - test program
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sched.h>
-
-#include "arch.h"
-
-/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
-#define CACHE_LINE_SIZE 4096
-
-/* hardcoded number of CPUs */
-#define NR_CPUS 16384
-
-#if defined(_syscall0)
-_syscall0(pid_t, gettid)
-#elif defined(__NR_gettid)
-static inline pid_t gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#else
-#warning "use pid as tid"
-static inline pid_t gettid(void)
-{
-	return getpid();
-}
-#endif
-
-#define _LGPL_SOURCE
-#include "urcu-qsbr.h"
-
-struct test_array {
-	int a;
-};
-
-static volatile int test_go, test_stop;
-
-static unsigned long wdelay;
-
-static struct test_array *test_rcu_pointer;
-
-static unsigned long duration;
-
-/* read-side C.S. duration, in loops */
-static unsigned long rduration;
-
-static inline void loop_sleep(unsigned long l)
-{
-	while(l-- != 0)
-		cpu_relax();
-}
-
-static int verbose_mode;
-
-#define printf_verbose(fmt, args...)		\
-	do {					\
-		if (verbose_mode)		\
-			printf(fmt, args);	\
-	} while (0)
-
-static unsigned int cpu_affinities[NR_CPUS];
-static unsigned int next_aff = 0;
-static int use_affinity = 0;
-
-pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static void set_affinity(void)
-{
-	cpu_set_t mask;
-	int cpu;
-	int ret;
-
-	if (!use_affinity)
-		return;
-
-	ret = pthread_mutex_lock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-	cpu = cpu_affinities[next_aff++];
-	ret = pthread_mutex_unlock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-	CPU_ZERO(&mask);
-	CPU_SET(cpu, &mask);
-	sched_setaffinity(0, sizeof(mask), &mask);
-}
-
-/*
- * returns 0 if test should end.
- */
-static int test_duration_write(void)
-{
-	return !test_stop;
-}
-
-static int test_duration_read(void)
-{
-	return !test_stop;
-}
-
-static unsigned long long __thread nr_writes;
-static unsigned long long __thread nr_reads;
-
-static unsigned int nr_readers;
-static unsigned int nr_writers;
-
-pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void rcu_copy_mutex_lock(void)
-{
-	int ret;
-	ret = pthread_mutex_lock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-}
-
-void rcu_copy_mutex_unlock(void)
-{
-	int ret;
-
-	ret = pthread_mutex_unlock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-}
-
-/*
- * malloc/free are reusing memory areas too quickly, which does not let us
- * test races appropriately. Use a large circular array for allocations.
- * ARRAY_SIZE is larger than nr_writers, which insures we never run over our tail.
- */
-#define ARRAY_SIZE (1048576 * nr_writers)
-#define ARRAY_POISON 0xDEADBEEF
-static int array_index;
-static struct test_array *test_array;
-
-static struct test_array *test_array_alloc(void)
-{
-	struct test_array *ret;
-	int index;
-
-	rcu_copy_mutex_lock();
-	index = array_index % ARRAY_SIZE;
-	assert(test_array[index].a == ARRAY_POISON ||
-		test_array[index].a == 0);
-	ret = &test_array[index];
-	array_index++;
-	if (array_index == ARRAY_SIZE)
-		array_index = 0;
-	rcu_copy_mutex_unlock();
-	return ret;
-}
-
-static void test_array_free(struct test_array *ptr)
-{
-	if (!ptr)
-		return;
-	rcu_copy_mutex_lock();
-	ptr->a = ARRAY_POISON;
-	rcu_copy_mutex_unlock();
-}
-
-void *thr_reader(void *_count)
-{
-	unsigned long long *count = _count;
-	struct test_array *local_ptr;
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	rcu_register_thread();
-
-	while (!test_go)
-	{
-	}
-	smp_mb();
-
-	for (;;) {
-		_rcu_read_lock();
-		local_ptr = _rcu_dereference(test_rcu_pointer);
-		debug_yield_read();
-		if (local_ptr)
-			assert(local_ptr->a == 8);
-		if (unlikely(rduration))
-			loop_sleep(rduration);
-		_rcu_read_unlock();
-		nr_reads++;
-		/* QS each 1024 reads */
-		if (unlikely((nr_reads & ((1 << 10) - 1)) == 0))
-			_rcu_quiescent_state();
-		if (unlikely(!test_duration_read()))
-			break;
-	}
-
-	rcu_unregister_thread();
-
-	*count = nr_reads;
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	return ((void*)1);
-
-}
-
-void *thr_writer(void *_count)
-{
-	unsigned long long *count = _count;
-	struct test_array *new, *old;
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	while (!test_go)
-	{
-	}
-	smp_mb();
-
-	for (;;) {
-		new = test_array_alloc();
-		new->a = 8;
-		old = _rcu_publish_content(&test_rcu_pointer, new);
-		/* can be done after unlock */
-		if (old)
-			old->a = 0;
-		test_array_free(old);
-		nr_writes++;
-		if (unlikely(!test_duration_write()))
-			break;
-		if (unlikely(wdelay))
-			loop_sleep(wdelay);
-	}
-
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	*count = nr_writes;
-	return ((void*)2);
-}
-
-void show_usage(int argc, char **argv)
-{
-	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
-#ifdef DEBUG_YIELD
-	printf(" [-r] [-w] (yield reader and/or writer)");
-#endif
-	printf(" [-d delay] (writer period (us))");
-	printf(" [-c duration] (reader C.S. duration (in loops))");
-	printf(" [-v] (verbose output)");
-	printf(" [-a cpu#] [-a cpu#]... (affinity)");
-	printf("\n");
-}
-
-int main(int argc, char **argv)
-{
-	int err;
-	pthread_t *tid_reader, *tid_writer;
-	void *tret;
-	unsigned long long *count_reader, *count_writer;
-	unsigned long long tot_reads = 0, tot_writes = 0;
-	int i, a;
-
-	if (argc < 4) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	err = sscanf(argv[1], "%u", &nr_readers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	err = sscanf(argv[2], "%u", &nr_writers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-	
-	err = sscanf(argv[3], "%lu", &duration);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	for (i = 4; i < argc; i++) {
-		if (argv[i][0] != '-')
-			continue;
-		switch (argv[i][1]) {
-#ifdef DEBUG_YIELD
-		case 'r':
-			yield_active |= YIELD_READ;
-			break;
-		case 'w':
-			yield_active |= YIELD_WRITE;
-			break;
-#endif
-		case 'a':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			a = atoi(argv[++i]);
-			cpu_affinities[next_aff++] = a;
-			use_affinity = 1;
-			printf_verbose("Adding CPU %d affinity\n", a);
-			break;
-		case 'c':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			rduration = atol(argv[++i]);
-			break;
-		case 'd':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			wdelay = atol(argv[++i]);
-			break;
-		case 'v':
-			verbose_mode = 1;
-			break;
-		}
-	}
-
-	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
-		duration, nr_readers, nr_writers);
-	printf_verbose("Writer delay : %lu loops.\n", wdelay);
-	printf_verbose("Reader duration : %lu loops.\n", rduration);
-	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
-			"main", pthread_self(), (unsigned long)gettid());
-
-	test_array = malloc(sizeof(*test_array) * ARRAY_SIZE);
-	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
-	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
-	count_reader = malloc(sizeof(*count_reader) * nr_readers);
-	count_writer = malloc(sizeof(*count_writer) * nr_writers);
-
-	next_aff = 0;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_create(&tid_reader[i], NULL, thr_reader,
-				     &count_reader[i]);
-		if (err != 0)
-			exit(1);
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_create(&tid_writer[i], NULL, thr_writer,
-				     &count_writer[i]);
-		if (err != 0)
-			exit(1);
-	}
-
-	smp_mb();
-
-	test_go = 1;
-
-	sleep(duration);
-
-	test_stop = 1;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_join(tid_reader[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_reads += count_reader[i];
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_join(tid_writer[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_writes += count_writer[i];
-	}
-	
-	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
-	       tot_writes);
-	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
-		"nr_writers %3u "
-		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu\n",
-		argv[0], duration, nr_readers, rduration,
-		nr_writers, wdelay, tot_reads, tot_writes,
-		tot_reads + tot_writes);
-	test_array_free(test_rcu_pointer);
-	free(test_array);
-	free(tid_reader);
-	free(tid_writer);
-	free(count_reader);
-	free(count_writer);
-	return 0;
-}
diff --git a/test_qsbr_gc.c b/test_qsbr_gc.c
deleted file mode 100644
index 3a66a84..0000000
--- a/test_qsbr_gc.c
+++ /dev/null
@@ -1,461 +0,0 @@
-/*
- * test_urcu_gc.c
- *
- * Userspace RCU library - test program (with baatch reclamation)
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sched.h>
-
-#include "arch.h"
-
-/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
-#define CACHE_LINE_SIZE 4096
-
-/* hardcoded number of CPUs */
-#define NR_CPUS 16384
-
-#if defined(_syscall0)
-_syscall0(pid_t, gettid)
-#elif defined(__NR_gettid)
-static inline pid_t gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#else
-#warning "use pid as tid"
-static inline pid_t gettid(void)
-{
-	return getpid();
-}
-#endif
-
-#define _LGPL_SOURCE
-#include "urcu-qsbr.h"
-
-struct test_array {
-	int a;
-};
-
-static volatile int test_go, test_stop;
-
-static unsigned long wdelay;
-
-static struct test_array *test_rcu_pointer;
-
-static unsigned long duration;
-
-/* read-side C.S. duration, in loops */
-static unsigned long rduration;
-static unsigned int reclaim_batch = 1;
-
-struct reclaim_queue {
-	void **queue;	/* Beginning of queue */
-	void **head;	/* Insert position */
-};
-
-static struct reclaim_queue *pending_reclaims;
-
-
-static inline void loop_sleep(unsigned long l)
-{
-	while(l-- != 0)
-		cpu_relax();
-}
-
-static int verbose_mode;
-
-#define printf_verbose(fmt, args...)		\
-	do {					\
-		if (verbose_mode)		\
-			printf(fmt, args);	\
-	} while (0)
-
-static unsigned int cpu_affinities[NR_CPUS];
-static unsigned int next_aff = 0;
-static int use_affinity = 0;
-
-pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static void set_affinity(void)
-{
-	cpu_set_t mask;
-	int cpu;
-	int ret;
-
-	if (!use_affinity)
-		return;
-
-	ret = pthread_mutex_lock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-	cpu = cpu_affinities[next_aff++];
-	ret = pthread_mutex_unlock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-	CPU_ZERO(&mask);
-	CPU_SET(cpu, &mask);
-	sched_setaffinity(0, sizeof(mask), &mask);
-}
-
-/*
- * returns 0 if test should end.
- */
-static int test_duration_write(void)
-{
-	return !test_stop;
-}
-
-static int test_duration_read(void)
-{
-	return !test_stop;
-}
-
-static unsigned long long __thread nr_writes;
-static unsigned long long __thread nr_reads;
-
-static unsigned int nr_readers;
-static unsigned int nr_writers;
-
-pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
-static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
-
-
-void rcu_copy_mutex_lock(void)
-{
-	int ret;
-	ret = pthread_mutex_lock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-}
-
-void rcu_copy_mutex_unlock(void)
-{
-	int ret;
-
-	ret = pthread_mutex_unlock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-}
-
-void *thr_reader(void *_count)
-{
-	unsigned long long *count = _count;
-	struct test_array *local_ptr;
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	rcu_register_thread();
-
-	while (!test_go)
-	{
-	}
-	smp_mb();
-
-	for (;;) {
-		_rcu_read_lock();
-		local_ptr = _rcu_dereference(test_rcu_pointer);
-		debug_yield_read();
-		if (local_ptr)
-			assert(local_ptr->a == 8);
-		if (unlikely(rduration))
-			loop_sleep(rduration);
-		_rcu_read_unlock();
-		nr_reads++;
-		/* QS each 1024 reads */
-		if (unlikely((nr_reads & ((1 << 10) - 1)) == 0))
-			_rcu_quiescent_state();
-		if (unlikely(!test_duration_read()))
-			break;
-	}
-
-	rcu_unregister_thread();
-
-	*count = nr_reads;
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	return ((void*)1);
-
-}
-
-static void rcu_gc_clear_queue(unsigned long wtidx)
-{
-	void **p;
-
-	/* Wait for Q.S and empty queue */
-	synchronize_rcu();
-
-	for (p = pending_reclaims[wtidx].queue;
-			p < pending_reclaims[wtidx].head; p++) {
-		/* poison */
-		if (*p)
-			((struct test_array *)*p)->a = 0;
-		free(*p);
-	}
-	pending_reclaims[wtidx].head = pending_reclaims[wtidx].queue;
-}
-
-/* Using per-thread queue */
-static void rcu_gc_reclaim(unsigned long wtidx, void *old)
-{
-	/* Queue pointer */
-	*pending_reclaims[wtidx].head = old;
-	pending_reclaims[wtidx].head++;
-
-	if (likely(pending_reclaims[wtidx].head - pending_reclaims[wtidx].queue
-			< reclaim_batch))
-		return;
-
-	rcu_gc_clear_queue(wtidx);
-}
-
-void *thr_writer(void *data)
-{
-	unsigned long wtidx = (unsigned long)data;
-#ifdef TEST_LOCAL_GC
-	struct test_array *old = NULL;
-#else
-	struct test_array *new, *old;
-#endif
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	while (!test_go)
-	{
-	}
-	smp_mb();
-
-	for (;;) {
-#ifndef TEST_LOCAL_GC
-		new = malloc(sizeof(*new));
-		new->a = 8;
-		old = _rcu_xchg_pointer(&test_rcu_pointer, new);
-#endif
-		rcu_gc_reclaim(wtidx, old);
-		nr_writes++;
-		if (unlikely(!test_duration_write()))
-			break;
-		if (unlikely(wdelay))
-			loop_sleep(wdelay);
-	}
-
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	tot_nr_writes[wtidx] = nr_writes;
-	return ((void*)2);
-}
-
-void show_usage(int argc, char **argv)
-{
-	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
-#ifdef DEBUG_YIELD
-	printf(" [-r] [-w] (yield reader and/or writer)");
-#endif
-	printf(" [-d delay] (writer period (us))");
-	printf(" [-c duration] (reader C.S. duration (in loops))");
-	printf(" [-v] (verbose output)");
-	printf(" [-a cpu#] [-a cpu#]... (affinity)");
-	printf("\n");
-}
-
-int main(int argc, char **argv)
-{
-	int err;
-	pthread_t *tid_reader, *tid_writer;
-	void *tret;
-	unsigned long long *count_reader;
-	unsigned long long tot_reads = 0, tot_writes = 0;
-	int i, a;
-
-	if (argc < 4) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	err = sscanf(argv[1], "%u", &nr_readers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	err = sscanf(argv[2], "%u", &nr_writers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-	
-	err = sscanf(argv[3], "%lu", &duration);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	for (i = 4; i < argc; i++) {
-		if (argv[i][0] != '-')
-			continue;
-		switch (argv[i][1]) {
-#ifdef DEBUG_YIELD
-		case 'r':
-			yield_active |= YIELD_READ;
-			break;
-		case 'w':
-			yield_active |= YIELD_WRITE;
-			break;
-#endif
-		case 'a':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			a = atoi(argv[++i]);
-			cpu_affinities[next_aff++] = a;
-			use_affinity = 1;
-			printf_verbose("Adding CPU %d affinity\n", a);
-			break;
-		case 'b':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			reclaim_batch = atol(argv[++i]);
-			break;
-		case 'c':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			rduration = atol(argv[++i]);
-			break;
-		case 'd':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			wdelay = atol(argv[++i]);
-			break;
-		case 'v':
-			verbose_mode = 1;
-			break;
-		}
-	}
-
-	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
-		duration, nr_readers, nr_writers);
-	printf_verbose("Writer delay : %lu loops.\n", wdelay);
-	printf_verbose("Reader duration : %lu loops.\n", rduration);
-	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
-			"main", pthread_self(), (unsigned long)gettid());
-
-	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
-	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
-	count_reader = malloc(sizeof(*count_reader) * nr_readers);
-	tot_nr_writes = malloc(sizeof(*tot_nr_writes) * nr_writers);
-	pending_reclaims = malloc(sizeof(*pending_reclaims) * nr_writers);
-	if (reclaim_batch * sizeof(*pending_reclaims[i].queue)
-			< CACHE_LINE_SIZE)
-		for (i = 0; i < nr_writers; i++)
-			pending_reclaims[i].queue = calloc(1, CACHE_LINE_SIZE);
-	else
-		for (i = 0; i < nr_writers; i++)
-			pending_reclaims[i].queue = calloc(reclaim_batch,
-					sizeof(*pending_reclaims[i].queue));
-	for (i = 0; i < nr_writers; i++)
-		pending_reclaims[i].head = pending_reclaims[i].queue;
-
-	next_aff = 0;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_create(&tid_reader[i], NULL, thr_reader,
-				     &count_reader[i]);
-		if (err != 0)
-			exit(1);
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_create(&tid_writer[i], NULL, thr_writer,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-
-	smp_mb();
-
-	test_go = 1;
-
-	sleep(duration);
-
-	test_stop = 1;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_join(tid_reader[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_reads += count_reader[i];
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_join(tid_writer[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_writes += tot_nr_writes[i];
-		rcu_gc_clear_queue(i);
-	}
-	
-	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
-	       tot_writes);
-	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
-		"nr_writers %3u "
-		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu "
-		"batch %u\n",
-		argv[0], duration, nr_readers, rduration,
-		nr_writers, wdelay, tot_reads, tot_writes,
-		tot_reads + tot_writes, reclaim_batch);
-	free(tid_reader);
-	free(tid_writer);
-	free(count_reader);
-	free(tot_nr_writes);
-	for (i = 0; i < nr_writers; i++)
-		free(pending_reclaims[i].queue);
-	free(pending_reclaims);
-
-	return 0;
-}
diff --git a/test_qsbr_timing.c b/test_qsbr_timing.c
deleted file mode 100644
index e46a435..0000000
--- a/test_qsbr_timing.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * test_qsbr_timing.c
- *
- * Userspace QSBR - test program
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <arch.h>
-
-/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
-#define CACHE_LINE_SIZE 4096
-
-#if defined(_syscall0)
-_syscall0(pid_t, gettid)
-#elif defined(__NR_gettid)
-static inline pid_t gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#else
-#warning "use pid as tid"
-static inline pid_t gettid(void)
-{
-	return getpid();
-}
-#endif
-
-#define _LGPL_SOURCE
-#include "urcu-qsbr.h"
-
-pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void rcu_copy_mutex_lock(void)
-{
-	int ret;
-	ret = pthread_mutex_lock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-}
-
-void rcu_copy_mutex_unlock(void)
-{
-	int ret;
-
-	ret = pthread_mutex_unlock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-}
-
-struct test_array {
-	int a;
-};
-
-static struct test_array *test_rcu_pointer;
-
-#define OUTER_READ_LOOP	2000U
-#define INNER_READ_LOOP	100000U
-#define READ_LOOP ((unsigned long long)OUTER_READ_LOOP * INNER_READ_LOOP)
-
-#define OUTER_WRITE_LOOP 10U
-#define INNER_WRITE_LOOP 200U
-#define WRITE_LOOP ((unsigned long long)OUTER_WRITE_LOOP * INNER_WRITE_LOOP)
-
-static int num_read;
-static int num_write;
-
-#define NR_READ num_read
-#define NR_WRITE num_write
-
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
-
-void *thr_reader(void *arg)
-{
-	int i, j;
-	struct test_array *local_ptr;
-	cycles_t time1, time2;
-
-	printf("thread_begin %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	sleep(2);
-
-	rcu_register_thread();
-
-	time1 = get_cycles();
-	for (i = 0; i < OUTER_READ_LOOP; i++) {
-		for (j = 0; j < INNER_READ_LOOP; j++) {
-			_rcu_read_lock();
-			local_ptr = _rcu_dereference(test_rcu_pointer);
-			if (local_ptr) {
-				assert(local_ptr->a == 8);
-			}
-			_rcu_read_unlock();
-		}
-		_rcu_quiescent_state();
-	}
-	time2 = get_cycles();
-
-	rcu_unregister_thread();
-
-	reader_time[(unsigned long)arg] = time2 - time1;
-
-	sleep(2);
-	printf("thread_end %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	return ((void*)1);
-
-}
-
-void *thr_writer(void *arg)
-{
-	int i, j;
-	struct test_array *new, *old;
-	cycles_t time1, time2;
-
-	printf("thread_begin %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	sleep(2);
-
-	for (i = 0; i < OUTER_WRITE_LOOP; i++) {
-		for (j = 0; j < INNER_WRITE_LOOP; j++) {
-			time1 = get_cycles();
-			new = malloc(sizeof(struct test_array));
-			rcu_copy_mutex_lock();
-			old = test_rcu_pointer;
-			if (old) {
-				assert(old->a == 8);
-			}
-			new->a = 8;
-			old = _rcu_publish_content(&test_rcu_pointer, new);
-			rcu_copy_mutex_unlock();
-			/* can be done after unlock */
-			if (old) {
-				old->a = 0;
-			}
-			free(old);
-			time2 = get_cycles();
-			writer_time[(unsigned long)arg] += time2 - time1;
-			usleep(1);
-		}
-	}
-
-	printf("thread_end %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	return ((void*)2);
-}
-
-int main(int argc, char **argv)
-{
-	int err;
-	pthread_t *tid_reader, *tid_writer;
-	void *tret;
-	int i;
-	cycles_t tot_rtime = 0;
-	cycles_t tot_wtime = 0;
-
-	if (argc < 2) {
-		printf("Usage : %s nr_readers nr_writers\n", argv[0]);
-		exit(-1);
-	}
-	num_read = atoi(argv[1]);
-	num_write = atoi(argv[2]);
-
-	reader_time = malloc(sizeof(*reader_time) * num_read);
-	writer_time = malloc(sizeof(*writer_time) * num_write);
-	tid_reader = malloc(sizeof(*tid_reader) * num_read);
-	tid_writer = malloc(sizeof(*tid_writer) * num_write);
-
-	printf("thread %-6s, thread id : %lx, tid %lu\n",
-			"main", pthread_self(), (unsigned long)gettid());
-
-	for (i = 0; i < NR_READ; i++) {
-		err = pthread_create(&tid_reader[i], NULL, thr_reader,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-	for (i = 0; i < NR_WRITE; i++) {
-		err = pthread_create(&tid_writer[i], NULL, thr_writer,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-
-	sleep(10);
-
-	for (i = 0; i < NR_READ; i++) {
-		err = pthread_join(tid_reader[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_rtime += reader_time[i];
-	}
-	for (i = 0; i < NR_WRITE; i++) {
-		err = pthread_join(tid_writer[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_wtime += writer_time[i];
-	}
-	free(test_rcu_pointer);
-	printf("Time per read : %g cycles\n",
-	       (double)tot_rtime / ((double)NR_READ * (double)READ_LOOP));
-	printf("Time per write : %g cycles\n",
-	       (double)tot_wtime / ((double)NR_WRITE * (double)WRITE_LOOP));
-
-	free(reader_time);
-	free(writer_time);
-	free(tid_reader);
-	free(tid_writer);
-
-	return 0;
-}
diff --git a/test_rwlock.c b/test_rwlock.c
deleted file mode 100644
index 4cc4c07..0000000
--- a/test_rwlock.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * test_urcu.c
- *
- * Userspace RCU library - test program
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sched.h>
-
-#include "arch.h"
-
-/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
-#define CACHE_LINE_SIZE 4096
-
-/* hardcoded number of CPUs */
-#define NR_CPUS 16384
-
-#if defined(_syscall0)
-_syscall0(pid_t, gettid)
-#elif defined(__NR_gettid)
-static inline pid_t gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#else
-#warning "use pid as tid"
-static inline pid_t gettid(void)
-{
-	return getpid();
-}
-#endif
-
-#ifndef DYNAMIC_LINK_TEST
-#define _LGPL_SOURCE
-#else
-#define debug_yield_read()
-#endif
-#include "urcu.h"
-
-struct test_array {
-	int a;
-};
-
-pthread_rwlock_t lock = PTHREAD_RWLOCK_INITIALIZER;
-
-static volatile int test_go, test_stop;
-
-static unsigned long wdelay;
-
-static volatile struct test_array test_array = { 8 };
-
-static unsigned long duration;
-
-/* read-side C.S. duration, in loops */
-static unsigned long rduration;
-
-static inline void loop_sleep(unsigned long l)
-{
-	while(l-- != 0)
-		cpu_relax();
-}
-
-static int verbose_mode;
-
-#define printf_verbose(fmt, args...)		\
-	do {					\
-		if (verbose_mode)		\
-			printf(fmt, args);	\
-	} while (0)
-
-static unsigned int cpu_affinities[NR_CPUS];
-static unsigned int next_aff = 0;
-static int use_affinity = 0;
-
-pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static void set_affinity(void)
-{
-	cpu_set_t mask;
-	int cpu;
-	int ret;
-
-	if (!use_affinity)
-		return;
-
-	ret = pthread_mutex_lock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-	cpu = cpu_affinities[next_aff++];
-	ret = pthread_mutex_unlock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-	CPU_ZERO(&mask);
-	CPU_SET(cpu, &mask);
-	sched_setaffinity(0, sizeof(mask), &mask);
-}
-
-/*
- * returns 0 if test should end.
- */
-static int test_duration_write(void)
-{
-	return !test_stop;
-}
-
-static int test_duration_read(void)
-{
-	return !test_stop;
-}
-
-static unsigned long long __thread nr_writes;
-static unsigned long long __thread nr_reads;
-
-static unsigned int nr_readers;
-static unsigned int nr_writers;
-
-pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void rcu_copy_mutex_lock(void)
-{
-	int ret;
-	ret = pthread_mutex_lock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-}
-
-void rcu_copy_mutex_unlock(void)
-{
-	int ret;
-
-	ret = pthread_mutex_unlock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-}
-
-void *thr_reader(void *_count)
-{
-	unsigned long long *count = _count;
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	while (!test_go)
-	{
-	}
-
-	for (;;) {
-		pthread_rwlock_rdlock(&lock);
-		assert(test_array.a == 8);
-		if (unlikely(rduration))
-			loop_sleep(rduration);
-		pthread_rwlock_unlock(&lock);
-		nr_reads++;
-		if (unlikely(!test_duration_read()))
-			break;
-	}
-
-	*count = nr_reads;
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	return ((void*)1);
-
-}
-
-void *thr_writer(void *_count)
-{
-	unsigned long long *count = _count;
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	while (!test_go)
-	{
-	}
-	smp_mb();
-
-	for (;;) {
-		pthread_rwlock_wrlock(&lock);
-		test_array.a = 0;
-		test_array.a = 8;
-		pthread_rwlock_unlock(&lock);
-		nr_writes++;
-		if (unlikely(!test_duration_write()))
-			break;
-		if (unlikely(wdelay))
-			loop_sleep(wdelay);
-	}
-
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	*count = nr_writes;
-	return ((void*)2);
-}
-
-void show_usage(int argc, char **argv)
-{
-	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
-#ifdef DEBUG_YIELD
-	printf(" [-r] [-w] (yield reader and/or writer)");
-#endif
-	printf(" [-d delay] (writer period (us))");
-	printf(" [-c duration] (reader C.S. duration (in loops))");
-	printf(" [-v] (verbose output)");
-	printf(" [-a cpu#] [-a cpu#]... (affinity)");
-	printf("\n");
-}
-
-int main(int argc, char **argv)
-{
-	int err;
-	pthread_t *tid_reader, *tid_writer;
-	void *tret;
-	unsigned long long *count_reader, *count_writer;
-	unsigned long long tot_reads = 0, tot_writes = 0;
-	int i, a;
-
-	if (argc < 4) {
-		show_usage(argc, argv);
-		return -1;
-	}
-	smp_mb();
-
-	err = sscanf(argv[1], "%u", &nr_readers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	err = sscanf(argv[2], "%u", &nr_writers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-	
-	err = sscanf(argv[3], "%lu", &duration);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	for (i = 4; i < argc; i++) {
-		if (argv[i][0] != '-')
-			continue;
-		switch (argv[i][1]) {
-#ifdef DEBUG_YIELD
-		case 'r':
-			yield_active |= YIELD_READ;
-			break;
-		case 'w':
-			yield_active |= YIELD_WRITE;
-			break;
-#endif
-		case 'a':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			a = atoi(argv[++i]);
-			cpu_affinities[next_aff++] = a;
-			use_affinity = 1;
-			printf_verbose("Adding CPU %d affinity\n", a);
-			break;
-		case 'c':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			rduration = atol(argv[++i]);
-			break;
-		case 'd':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			wdelay = atol(argv[++i]);
-			break;
-		case 'v':
-			verbose_mode = 1;
-			break;
-		}
-	}
-
-	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
-		duration, nr_readers, nr_writers);
-	printf_verbose("Writer delay : %lu loops.\n", wdelay);
-	printf_verbose("Reader duration : %lu loops.\n", rduration);
-	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
-			"main", pthread_self(), (unsigned long)gettid());
-
-	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
-	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
-	count_reader = malloc(sizeof(*count_reader) * nr_readers);
-	count_writer = malloc(sizeof(*count_writer) * nr_writers);
-
-	next_aff = 0;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_create(&tid_reader[i], NULL, thr_reader,
-				     &count_reader[i]);
-		if (err != 0)
-			exit(1);
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_create(&tid_writer[i], NULL, thr_writer,
-				     &count_writer[i]);
-		if (err != 0)
-			exit(1);
-	}
-
-	smp_mb();
-
-	test_go = 1;
-
-	sleep(duration);
-
-	test_stop = 1;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_join(tid_reader[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_reads += count_reader[i];
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_join(tid_writer[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_writes += count_writer[i];
-	}
-
-	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
-	       tot_writes);
-	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
-		"nr_writers %3u "
-		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu\n",
-		argv[0], duration, nr_readers, rduration,
-		nr_writers, wdelay, tot_reads, tot_writes,
-		tot_reads + tot_writes);
-
-	free(tid_reader);
-	free(tid_writer);
-	free(count_reader);
-	free(count_writer);
-	return 0;
-}
diff --git a/test_rwlock_timing.c b/test_rwlock_timing.c
deleted file mode 100644
index 4884e4d..0000000
--- a/test_rwlock_timing.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * test_urcu.c
- *
- * Userspace RCU library - test program
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <pthread.h>
-#include <arch.h>
-
-/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
-#define CACHE_LINE_SIZE 4096
-
-#if defined(_syscall0)
-_syscall0(pid_t, gettid)
-#elif defined(__NR_gettid)
-static inline pid_t gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#else
-#warning "use pid as tid"
-static inline pid_t gettid(void)
-{
-	return getpid();
-}
-#endif
-
-#include "urcu.h"
-
-struct test_array {
-	int a;
-};
-
-pthread_rwlock_t lock = PTHREAD_RWLOCK_INITIALIZER;
-
-static struct test_array test_array = { 8 };
-
-#define OUTER_READ_LOOP	200U
-#define INNER_READ_LOOP	100000U
-#define READ_LOOP ((unsigned long long)OUTER_READ_LOOP * INNER_READ_LOOP)
-
-#define OUTER_WRITE_LOOP 10U
-#define INNER_WRITE_LOOP 200U
-#define WRITE_LOOP ((unsigned long long)OUTER_WRITE_LOOP * INNER_WRITE_LOOP)
-
-static int num_read;
-static int num_write;
-
-#define NR_READ num_read
-#define NR_WRITE num_write
-
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
-
-void *thr_reader(void *arg)
-{
-	int i, j;
-	cycles_t time1, time2;
-
-	printf("thread_begin %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	sleep(2);
-
-	time1 = get_cycles();
-	for (i = 0; i < OUTER_READ_LOOP; i++) {
-		for (j = 0; j < INNER_READ_LOOP; j++) {
-			pthread_rwlock_rdlock(&lock);
-			assert(test_array.a == 8);
-			pthread_rwlock_unlock(&lock);
-		}
-	}
-	time2 = get_cycles();
-
-	reader_time[(unsigned long)arg] = time2 - time1;
-
-	sleep(2);
-	printf("thread_end %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	return ((void*)1);
-
-}
-
-void *thr_writer(void *arg)
-{
-	int i, j;
-	cycles_t time1, time2;
-
-	printf("thread_begin %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	sleep(2);
-
-	for (i = 0; i < OUTER_WRITE_LOOP; i++) {
-		for (j = 0; j < INNER_WRITE_LOOP; j++) {
-			time1 = get_cycles();
-			pthread_rwlock_wrlock(&lock);
-			test_array.a = 8;
-			pthread_rwlock_unlock(&lock);
-			time2 = get_cycles();
-			writer_time[(unsigned long)arg] += time2 - time1;
-			usleep(1);
-		}
-	}
-
-	printf("thread_end %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	return ((void*)2);
-}
-
-int main(int argc, char **argv)
-{
-	int err;
-	pthread_t *tid_reader, *tid_writer;
-	void *tret;
-	int i;
-	cycles_t tot_rtime = 0;
-	cycles_t tot_wtime = 0;
-
-	if (argc < 2) {
-		printf("Usage : %s nr_readers nr_writers\n", argv[0]);
-		exit(-1);
-	}
-	num_read = atoi(argv[1]);
-	num_write = atoi(argv[2]);
-
-	reader_time = malloc(sizeof(*reader_time) * num_read);
-	writer_time = malloc(sizeof(*writer_time) * num_write);
-	tid_reader = malloc(sizeof(*tid_reader) * num_read);
-	tid_writer = malloc(sizeof(*tid_writer) * num_write);
-
-	printf("thread %-6s, thread id : %lx, tid %lu\n",
-			"main", pthread_self(), (unsigned long)gettid());
-
-	for (i = 0; i < NR_READ; i++) {
-		err = pthread_create(&tid_reader[i], NULL, thr_reader,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-	for (i = 0; i < NR_WRITE; i++) {
-		err = pthread_create(&tid_writer[i], NULL, thr_writer,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-
-	sleep(10);
-
-	for (i = 0; i < NR_READ; i++) {
-		err = pthread_join(tid_reader[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_rtime += reader_time[i];
-	}
-	for (i = 0; i < NR_WRITE; i++) {
-		err = pthread_join(tid_writer[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_wtime += writer_time[i];
-	}
-	printf("Time per read : %g cycles\n",
-	       (double)tot_rtime / ((double)NR_READ * (double)READ_LOOP));
-	printf("Time per write : %g cycles\n",
-	       (double)tot_wtime / ((double)NR_WRITE * (double)WRITE_LOOP));
-
-	free(reader_time);
-	free(writer_time);
-	free(tid_reader);
-	free(tid_writer);
-
-	return 0;
-}
diff --git a/test_urcu.c b/test_urcu.c
deleted file mode 100644
index 3feda89..0000000
--- a/test_urcu.c
+++ /dev/null
@@ -1,430 +0,0 @@
-/*
- * test_urcu.c
- *
- * Userspace RCU library - test program
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sched.h>
-
-#include "arch.h"
-
-/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
-#define CACHE_LINE_SIZE 4096
-
-/* hardcoded number of CPUs */
-#define NR_CPUS 16384
-
-#if defined(_syscall0)
-_syscall0(pid_t, gettid)
-#elif defined(__NR_gettid)
-static inline pid_t gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#else
-#warning "use pid as tid"
-static inline pid_t gettid(void)
-{
-	return getpid();
-}
-#endif
-
-#ifndef DYNAMIC_LINK_TEST
-#define _LGPL_SOURCE
-#else
-#define debug_yield_read()
-#endif
-#include "urcu.h"
-
-struct test_array {
-	int a;
-};
-
-static volatile int test_go, test_stop;
-
-static unsigned long wdelay;
-
-static struct test_array *test_rcu_pointer;
-
-static unsigned long duration;
-
-/* read-side C.S. duration, in loops */
-static unsigned long rduration;
-
-static inline void loop_sleep(unsigned long l)
-{
-	while(l-- != 0)
-		cpu_relax();
-}
-
-static int verbose_mode;
-
-#define printf_verbose(fmt, args...)		\
-	do {					\
-		if (verbose_mode)		\
-			printf(fmt, args);	\
-	} while (0)
-
-static unsigned int cpu_affinities[NR_CPUS];
-static unsigned int next_aff = 0;
-static int use_affinity = 0;
-
-pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static void set_affinity(void)
-{
-	cpu_set_t mask;
-	int cpu;
-	int ret;
-
-	if (!use_affinity)
-		return;
-
-	ret = pthread_mutex_lock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-	cpu = cpu_affinities[next_aff++];
-	ret = pthread_mutex_unlock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-	CPU_ZERO(&mask);
-	CPU_SET(cpu, &mask);
-	sched_setaffinity(0, sizeof(mask), &mask);
-}
-
-/*
- * returns 0 if test should end.
- */
-static int test_duration_write(void)
-{
-	return !test_stop;
-}
-
-static int test_duration_read(void)
-{
-	return !test_stop;
-}
-
-static unsigned long long __thread nr_writes;
-static unsigned long long __thread nr_reads;
-
-static unsigned int nr_readers;
-static unsigned int nr_writers;
-
-pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void rcu_copy_mutex_lock(void)
-{
-	int ret;
-	ret = pthread_mutex_lock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-}
-
-void rcu_copy_mutex_unlock(void)
-{
-	int ret;
-
-	ret = pthread_mutex_unlock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-}
-
-/*
- * malloc/free are reusing memory areas too quickly, which does not let us
- * test races appropriately. Use a large circular array for allocations.
- * ARRAY_SIZE is larger than nr_writers, which insures we never run over our tail.
- */
-#define ARRAY_SIZE (1048576 * nr_writers)
-#define ARRAY_POISON 0xDEADBEEF
-static int array_index;
-static struct test_array *test_array;
-
-static struct test_array *test_array_alloc(void)
-{
-	struct test_array *ret;
-	int index;
-
-	rcu_copy_mutex_lock();
-	index = array_index % ARRAY_SIZE;
-	assert(test_array[index].a == ARRAY_POISON ||
-		test_array[index].a == 0);
-	ret = &test_array[index];
-	array_index++;
-	if (array_index == ARRAY_SIZE)
-		array_index = 0;
-	rcu_copy_mutex_unlock();
-	return ret;
-}
-
-static void test_array_free(struct test_array *ptr)
-{
-	if (!ptr)
-		return;
-	rcu_copy_mutex_lock();
-	ptr->a = ARRAY_POISON;
-	rcu_copy_mutex_unlock();
-}
-
-void *thr_reader(void *_count)
-{
-	unsigned long long *count = _count;
-	struct test_array *local_ptr;
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	rcu_register_thread();
-
-	while (!test_go)
-	{
-	}
-	smp_mb();
-
-	for (;;) {
-		rcu_read_lock();
-		local_ptr = rcu_dereference(test_rcu_pointer);
-		debug_yield_read();
-		if (local_ptr)
-			assert(local_ptr->a == 8);
-		if (unlikely(rduration))
-			loop_sleep(rduration);
-		rcu_read_unlock();
-		nr_reads++;
-		if (unlikely(!test_duration_read()))
-			break;
-	}
-
-	rcu_unregister_thread();
-
-	*count = nr_reads;
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	return ((void*)1);
-
-}
-
-void *thr_writer(void *_count)
-{
-	unsigned long long *count = _count;
-	struct test_array *new, *old;
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	while (!test_go)
-	{
-	}
-	smp_mb();
-
-	for (;;) {
-		new = test_array_alloc();
-		new->a = 8;
-		old = rcu_publish_content(&test_rcu_pointer, new);
-		if (old)
-			old->a = 0;
-		test_array_free(old);
-		nr_writes++;
-		if (unlikely(!test_duration_write()))
-			break;
-		if (unlikely(wdelay))
-			loop_sleep(wdelay);
-	}
-
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	*count = nr_writes;
-	return ((void*)2);
-}
-
-void show_usage(int argc, char **argv)
-{
-	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
-#ifdef DEBUG_YIELD
-	printf(" [-r] [-w] (yield reader and/or writer)");
-#endif
-	printf(" [-d delay] (writer period (us))");
-	printf(" [-c duration] (reader C.S. duration (in loops))");
-	printf(" [-v] (verbose output)");
-	printf(" [-a cpu#] [-a cpu#]... (affinity)");
-	printf("\n");
-}
-
-int main(int argc, char **argv)
-{
-	int err;
-	pthread_t *tid_reader, *tid_writer;
-	void *tret;
-	unsigned long long *count_reader, *count_writer;
-	unsigned long long tot_reads = 0, tot_writes = 0;
-	int i, a;
-
-	if (argc < 4) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	err = sscanf(argv[1], "%u", &nr_readers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	err = sscanf(argv[2], "%u", &nr_writers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-	
-	err = sscanf(argv[3], "%lu", &duration);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	for (i = 4; i < argc; i++) {
-		if (argv[i][0] != '-')
-			continue;
-		switch (argv[i][1]) {
-#ifdef DEBUG_YIELD
-		case 'r':
-			yield_active |= YIELD_READ;
-			break;
-		case 'w':
-			yield_active |= YIELD_WRITE;
-			break;
-#endif
-		case 'a':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			a = atoi(argv[++i]);
-			cpu_affinities[next_aff++] = a;
-			use_affinity = 1;
-			printf_verbose("Adding CPU %d affinity\n", a);
-			break;
-		case 'c':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			rduration = atol(argv[++i]);
-			break;
-		case 'd':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			wdelay = atol(argv[++i]);
-			break;
-		case 'v':
-			verbose_mode = 1;
-			break;
-		}
-	}
-
-	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
-		duration, nr_readers, nr_writers);
-	printf_verbose("Writer delay : %lu loops.\n", wdelay);
-	printf_verbose("Reader duration : %lu loops.\n", rduration);
-	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
-			"main", pthread_self(), (unsigned long)gettid());
-
-	test_array = malloc(sizeof(*test_array) * ARRAY_SIZE);
-	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
-	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
-	count_reader = malloc(sizeof(*count_reader) * nr_readers);
-	count_writer = malloc(sizeof(*count_writer) * nr_writers);
-
-	next_aff = 0;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_create(&tid_reader[i], NULL, thr_reader,
-				     &count_reader[i]);
-		if (err != 0)
-			exit(1);
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_create(&tid_writer[i], NULL, thr_writer,
-				     &count_writer[i]);
-		if (err != 0)
-			exit(1);
-	}
-
-	smp_mb();
-
-	test_go = 1;
-
-	sleep(duration);
-
-	test_stop = 1;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_join(tid_reader[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_reads += count_reader[i];
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_join(tid_writer[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_writes += count_writer[i];
-	}
-	
-	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
-	       tot_writes);
-	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
-		"nr_writers %3u "
-		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu\n",
-		argv[0], duration, nr_readers, rduration,
-		nr_writers, wdelay, tot_reads, tot_writes,
-		tot_reads + tot_writes);
-	test_array_free(test_rcu_pointer);
-	free(test_array);
-	free(tid_reader);
-	free(tid_writer);
-	free(count_reader);
-	free(count_writer);
-	return 0;
-}
diff --git a/test_urcu_gc.c b/test_urcu_gc.c
deleted file mode 100644
index 12ea181..0000000
--- a/test_urcu_gc.c
+++ /dev/null
@@ -1,462 +0,0 @@
-/*
- * test_urcu_gc.c
- *
- * Userspace RCU library - test program (with baatch reclamation)
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sched.h>
-
-#include "arch.h"
-
-/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
-#define CACHE_LINE_SIZE 4096
-
-/* hardcoded number of CPUs */
-#define NR_CPUS 16384
-
-#if defined(_syscall0)
-_syscall0(pid_t, gettid)
-#elif defined(__NR_gettid)
-static inline pid_t gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#else
-#warning "use pid as tid"
-static inline pid_t gettid(void)
-{
-	return getpid();
-}
-#endif
-
-#ifndef DYNAMIC_LINK_TEST
-#define _LGPL_SOURCE
-#else
-#define debug_yield_read()
-#endif
-#include "urcu.h"
-
-struct test_array {
-	int a;
-};
-
-static volatile int test_go, test_stop;
-
-static unsigned long wdelay;
-
-static struct test_array *test_rcu_pointer;
-
-static unsigned int reclaim_batch = 1;
-
-struct reclaim_queue {
-	void **queue;	/* Beginning of queue */
-	void **head;	/* Insert position */
-};
-
-static struct reclaim_queue *pending_reclaims;
-
-static unsigned long duration;
-
-/* read-side C.S. duration, in loops */
-static unsigned long rduration;
-
-static inline void loop_sleep(unsigned long l)
-{
-	while(l-- != 0)
-		cpu_relax();
-}
-
-static int verbose_mode;
-
-#define printf_verbose(fmt, args...)		\
-	do {					\
-		if (verbose_mode)		\
-			printf(fmt, args);	\
-	} while (0)
-
-static unsigned int cpu_affinities[NR_CPUS];
-static unsigned int next_aff = 0;
-static int use_affinity = 0;
-
-pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static void set_affinity(void)
-{
-	cpu_set_t mask;
-	int cpu;
-	int ret;
-
-	if (!use_affinity)
-		return;
-
-	ret = pthread_mutex_lock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-	cpu = cpu_affinities[next_aff++];
-	ret = pthread_mutex_unlock(&affinity_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-	CPU_ZERO(&mask);
-	CPU_SET(cpu, &mask);
-	sched_setaffinity(0, sizeof(mask), &mask);
-}
-
-/*
- * returns 0 if test should end.
- */
-static int test_duration_write(void)
-{
-	return !test_stop;
-}
-
-static int test_duration_read(void)
-{
-	return !test_stop;
-}
-
-static unsigned long long __thread nr_writes;
-static unsigned long long __thread nr_reads;
-
-static
-unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
-
-static unsigned int nr_readers;
-static unsigned int nr_writers;
-
-pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void rcu_copy_mutex_lock(void)
-{
-	int ret;
-	ret = pthread_mutex_lock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-}
-
-void rcu_copy_mutex_unlock(void)
-{
-	int ret;
-
-	ret = pthread_mutex_unlock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-}
-
-void *thr_reader(void *_count)
-{
-	unsigned long long *count = _count;
-	struct test_array *local_ptr;
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	rcu_register_thread();
-
-	while (!test_go)
-	{
-	}
-	smp_mb();
-
-	for (;;) {
-		rcu_read_lock();
-		local_ptr = rcu_dereference(test_rcu_pointer);
-		debug_yield_read();
-		if (local_ptr)
-			assert(local_ptr->a == 8);
-		if (unlikely(rduration))
-			loop_sleep(rduration);
-		rcu_read_unlock();
-		nr_reads++;
-		if (unlikely(!test_duration_read()))
-			break;
-	}
-
-	rcu_unregister_thread();
-
-	*count = nr_reads;
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	return ((void*)1);
-
-}
-
-static void rcu_gc_clear_queue(unsigned long wtidx)
-{
-	void **p;
-
-	/* Wait for Q.S and empty queue */
-	synchronize_rcu();
-
-	for (p = pending_reclaims[wtidx].queue;
-			p < pending_reclaims[wtidx].head; p++) {
-		/* poison */
-		if (*p)
-			((struct test_array *)*p)->a = 0;
-		free(*p);
-	}
-	pending_reclaims[wtidx].head = pending_reclaims[wtidx].queue;
-}
-
-/* Using per-thread queue */
-static void rcu_gc_reclaim(unsigned long wtidx, void *old)
-{
-	/* Queue pointer */
-	*pending_reclaims[wtidx].head = old;
-	pending_reclaims[wtidx].head++;
-
-	if (likely(pending_reclaims[wtidx].head - pending_reclaims[wtidx].queue
-			< reclaim_batch))
-		return;
-
-	rcu_gc_clear_queue(wtidx);
-}
-
-void *thr_writer(void *data)
-{
-	unsigned long wtidx = (unsigned long)data;
-#ifdef TEST_LOCAL_GC
-	struct test_array *old = NULL;
-#else
-	struct test_array *new, *old;
-#endif
-
-	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-
-	set_affinity();
-
-	while (!test_go)
-	{
-	}
-	smp_mb();
-
-	for (;;) {
-#ifndef TEST_LOCAL_GC
-		new = malloc(sizeof(*new));
-		new->a = 8;
-		old = rcu_xchg_pointer(&test_rcu_pointer, new);
-#endif
-		rcu_gc_reclaim(wtidx, old);
-		nr_writes++;
-		if (unlikely(!test_duration_write()))
-			break;
-		if (unlikely(wdelay))
-			loop_sleep(wdelay);
-	}
-
-	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	tot_nr_writes[wtidx] = nr_writes;
-	return ((void*)2);
-}
-
-void show_usage(int argc, char **argv)
-{
-	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
-#ifdef DEBUG_YIELD
-	printf(" [-r] [-w] (yield reader and/or writer)");
-#endif
-	printf(" [-d delay] (writer period (us))");
-	printf(" [-c duration] (reader C.S. duration (in loops))");
-	printf(" [-v] (verbose output)");
-	printf(" [-a cpu#] [-a cpu#]... (affinity)");
-	printf("\n");
-}
-
-int main(int argc, char **argv)
-{
-	int err;
-	pthread_t *tid_reader, *tid_writer;
-	void *tret;
-	unsigned long long *count_reader;
-	unsigned long long tot_reads = 0, tot_writes = 0;
-	int i, a;
-
-	if (argc < 4) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	err = sscanf(argv[1], "%u", &nr_readers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	err = sscanf(argv[2], "%u", &nr_writers);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-	
-	err = sscanf(argv[3], "%lu", &duration);
-	if (err != 1) {
-		show_usage(argc, argv);
-		return -1;
-	}
-
-	for (i = 4; i < argc; i++) {
-		if (argv[i][0] != '-')
-			continue;
-		switch (argv[i][1]) {
-#ifdef DEBUG_YIELD
-		case 'r':
-			yield_active |= YIELD_READ;
-			break;
-		case 'w':
-			yield_active |= YIELD_WRITE;
-			break;
-#endif
-		case 'a':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			a = atoi(argv[++i]);
-			cpu_affinities[next_aff++] = a;
-			use_affinity = 1;
-			printf_verbose("Adding CPU %d affinity\n", a);
-			break;
-		case 'b':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			reclaim_batch = atol(argv[++i]);
-			break;
-		case 'c':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			rduration = atol(argv[++i]);
-			break;
-		case 'd':
-			if (argc < i + 2) {
-				show_usage(argc, argv);
-				return -1;
-			}
-			wdelay = atol(argv[++i]);
-			break;
-		case 'v':
-			verbose_mode = 1;
-			break;
-		}
-	}
-
-	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
-		duration, nr_readers, nr_writers);
-	printf_verbose("Writer delay : %lu loops.\n", wdelay);
-	printf_verbose("Reader duration : %lu loops.\n", rduration);
-	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
-			"main", pthread_self(), (unsigned long)gettid());
-
-	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
-	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
-	count_reader = malloc(sizeof(*count_reader) * nr_readers);
-	tot_nr_writes = malloc(sizeof(*tot_nr_writes) * nr_writers);
-	pending_reclaims = malloc(sizeof(*pending_reclaims) * nr_writers);
-	if (reclaim_batch * sizeof(*pending_reclaims[i].queue)
-			< CACHE_LINE_SIZE)
-		for (i = 0; i < nr_writers; i++)
-			pending_reclaims[i].queue = calloc(1, CACHE_LINE_SIZE);
-	else
-		for (i = 0; i < nr_writers; i++)
-			pending_reclaims[i].queue = calloc(reclaim_batch,
-					sizeof(*pending_reclaims[i].queue));
-	for (i = 0; i < nr_writers; i++)
-		pending_reclaims[i].head = pending_reclaims[i].queue;
-
-	next_aff = 0;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_create(&tid_reader[i], NULL, thr_reader,
-				     &count_reader[i]);
-		if (err != 0)
-			exit(1);
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_create(&tid_writer[i], NULL, thr_writer,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-
-	smp_mb();
-
-	test_go = 1;
-
-	sleep(duration);
-
-	test_stop = 1;
-
-	for (i = 0; i < nr_readers; i++) {
-		err = pthread_join(tid_reader[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_reads += count_reader[i];
-	}
-	for (i = 0; i < nr_writers; i++) {
-		err = pthread_join(tid_writer[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_writes += tot_nr_writes[i];
-		rcu_gc_clear_queue(i);
-	}
-	
-	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
-	       tot_writes);
-	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
-		"nr_writers %3u "
-		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu "
-		"batch %u\n",
-		argv[0], duration, nr_readers, rduration,
-		nr_writers, wdelay, tot_reads, tot_writes,
-		tot_reads + tot_writes, reclaim_batch);
-	free(tid_reader);
-	free(tid_writer);
-	free(count_reader);
-	free(tot_nr_writes);
-	for (i = 0; i < nr_writers; i++)
-		free(pending_reclaims[i].queue);
-	free(pending_reclaims);
-
-	return 0;
-}
diff --git a/test_urcu_timing.c b/test_urcu_timing.c
deleted file mode 100644
index 8577b8f..0000000
--- a/test_urcu_timing.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * test_urcu.c
- *
- * Userspace RCU library - test program
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <arch.h>
-
-/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
-#define CACHE_LINE_SIZE 4096
-
-#if defined(_syscall0)
-_syscall0(pid_t, gettid)
-#elif defined(__NR_gettid)
-static inline pid_t gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-#else
-#warning "use pid as tid"
-static inline pid_t gettid(void)
-{
-	return getpid();
-}
-#endif
-
-#define _LGPL_SOURCE
-#include "urcu.h"
-
-pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-void rcu_copy_mutex_lock(void)
-{
-	int ret;
-	ret = pthread_mutex_lock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex lock");
-		exit(-1);
-	}
-}
-
-void rcu_copy_mutex_unlock(void)
-{
-	int ret;
-
-	ret = pthread_mutex_unlock(&rcu_copy_mutex);
-	if (ret) {
-		perror("Error in pthread mutex unlock");
-		exit(-1);
-	}
-}
-
-struct test_array {
-	int a;
-};
-
-static struct test_array *test_rcu_pointer;
-
-#define OUTER_READ_LOOP	2000U
-#define INNER_READ_LOOP	100000U
-#define READ_LOOP ((unsigned long long)OUTER_READ_LOOP * INNER_READ_LOOP)
-
-#define OUTER_WRITE_LOOP 10U
-#define INNER_WRITE_LOOP 200U
-#define WRITE_LOOP ((unsigned long long)OUTER_WRITE_LOOP * INNER_WRITE_LOOP)
-
-static int num_read;
-static int num_write;
-
-#define NR_READ num_read
-#define NR_WRITE num_write
-
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
-static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
-
-void *thr_reader(void *arg)
-{
-	int i, j;
-	struct test_array *local_ptr;
-	cycles_t time1, time2;
-
-	printf("thread_begin %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	sleep(2);
-
-	rcu_register_thread();
-
-	time1 = get_cycles();
-	for (i = 0; i < OUTER_READ_LOOP; i++) {
-		for (j = 0; j < INNER_READ_LOOP; j++) {
-			rcu_read_lock();
-			local_ptr = rcu_dereference(test_rcu_pointer);
-			if (local_ptr) {
-				assert(local_ptr->a == 8);
-			}
-			rcu_read_unlock();
-		}
-	}
-	time2 = get_cycles();
-
-	rcu_unregister_thread();
-
-	reader_time[(unsigned long)arg] = time2 - time1;
-
-	sleep(2);
-	printf("thread_end %s, thread id : %lx, tid %lu\n",
-			"reader", pthread_self(), (unsigned long)gettid());
-	return ((void*)1);
-
-}
-
-void *thr_writer(void *arg)
-{
-	int i, j;
-	struct test_array *new, *old;
-	cycles_t time1, time2;
-
-	printf("thread_begin %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	sleep(2);
-
-	for (i = 0; i < OUTER_WRITE_LOOP; i++) {
-		for (j = 0; j < INNER_WRITE_LOOP; j++) {
-			time1 = get_cycles();
-			new = malloc(sizeof(struct test_array));
-			rcu_copy_mutex_lock();
-			old = test_rcu_pointer;
-			if (old) {
-				assert(old->a == 8);
-			}
-			new->a = 8;
-			old = rcu_publish_content(&test_rcu_pointer, new);
-			rcu_copy_mutex_unlock();
-			/* can be done after unlock */
-			if (old) {
-				old->a = 0;
-			}
-			free(old);
-			time2 = get_cycles();
-			writer_time[(unsigned long)arg] += time2 - time1;
-			usleep(1);
-		}
-	}
-
-	printf("thread_end %s, thread id : %lx, tid %lu\n",
-			"writer", pthread_self(), (unsigned long)gettid());
-	return ((void*)2);
-}
-
-int main(int argc, char **argv)
-{
-	int err;
-	pthread_t *tid_reader, *tid_writer;
-	void *tret;
-	int i;
-	cycles_t tot_rtime = 0;
-	cycles_t tot_wtime = 0;
-
-	if (argc < 2) {
-		printf("Usage : %s nr_readers nr_writers\n", argv[0]);
-		exit(-1);
-	}
-	num_read = atoi(argv[1]);
-	num_write = atoi(argv[2]);
-
-	reader_time = malloc(sizeof(*reader_time) * num_read);
-	writer_time = malloc(sizeof(*writer_time) * num_write);
-	tid_reader = malloc(sizeof(*tid_reader) * num_read);
-	tid_writer = malloc(sizeof(*tid_writer) * num_write);
-
-	printf("thread %-6s, thread id : %lx, tid %lu\n",
-			"main", pthread_self(), (unsigned long)gettid());
-
-	for (i = 0; i < NR_READ; i++) {
-		err = pthread_create(&tid_reader[i], NULL, thr_reader,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-	for (i = 0; i < NR_WRITE; i++) {
-		err = pthread_create(&tid_writer[i], NULL, thr_writer,
-				     (void *)(long)i);
-		if (err != 0)
-			exit(1);
-	}
-
-	sleep(10);
-
-	for (i = 0; i < NR_READ; i++) {
-		err = pthread_join(tid_reader[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_rtime += reader_time[i];
-	}
-	for (i = 0; i < NR_WRITE; i++) {
-		err = pthread_join(tid_writer[i], &tret);
-		if (err != 0)
-			exit(1);
-		tot_wtime += writer_time[i];
-	}
-	free(test_rcu_pointer);
-	printf("Time per read : %g cycles\n",
-	       (double)tot_rtime / ((double)NR_READ * (double)READ_LOOP));
-	printf("Time per write : %g cycles\n",
-	       (double)tot_wtime / ((double)NR_WRITE * (double)WRITE_LOOP));
-
-	free(reader_time);
-	free(writer_time);
-	free(tid_reader);
-	free(tid_writer);
-
-	return 0;
-}
diff --git a/tests/Makefile b/tests/Makefile
new file mode 100644
index 0000000..ab19097
--- /dev/null
+++ b/tests/Makefile
@@ -0,0 +1 @@
+include Makefile.inc
diff --git a/tests/Makefile.inc b/tests/Makefile.inc
new file mode 100644
index 0000000..5fac462
--- /dev/null
+++ b/tests/Makefile.inc
@@ -0,0 +1,114 @@
+
+include ../Makefile.build.inc
+
+ifeq ($(findstring ${ARCHTYPE},"x86 ppc"),)
+APIHEADER=api_gcc.h
+else
+APIHEADER=api_${ARCHTYPE}.h
+endif
+
+LIBDIR=..
+
+CFLAGS+=-I ${LIBDIR}
+
+URCU_SIGNAL=${LIBDIR}/urcu.o ${LIBDIR}/urcu.h
+URCU_SIGNAL_YIELD=${LIBDIR}/urcu-yield.o ${LIBDIR}/urcu.h
+URCU_MB=${LIBDIR}/urcu-mb.o ${LIBDIR}/urcu.h
+URCU_QSBR=${LIBDIR}/urcu-qsbr.o ${LIBDIR}/urcu-qsbr.h
+
+all: test_urcu test_urcu_dynamic_link test_urcu_timing \
+	test_rwlock_timing test_rwlock test_perthreadlock_timing \
+	test_perthreadlock test_urcu_yield test_urcu_mb \
+	urcu-asm.S test_qsbr_timing test_qsbr urcu-asm.o urcutorture \
+	urcutorture-yield test_mutex test_looplen test_urcu_gc \
+	test_urcu_gc_mb test_qsbr_gc test_qsbr_lgc test_urcu_lgc \
+	test_urcu_lgc_mb
+
+api.h: ${APIHEADER}
+	cp -f ${APIHEADER} api.h
+
+test_looplen: test_looplen.c ${LIBDIR}/urcu.h
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+
+test_urcu: test_urcu.c ${URCU_SIGNAL}
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_urcu_gc: test_urcu_gc.c ${URCU_SIGNAL}
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_urcu_lgc: test_urcu_gc.c ${URCU_SIGNAL}
+	$(CC) -DTEST_LOCAL_GC ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+
+test_urcu_mb: test_urcu.c ${URCU_MB}
+	$(CC) -DCONFIG_URCU_AVOID_SIGNALS ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_urcu_gc_mb: test_urcu_gc.c ${URCU_MB}
+	$(CC) -DCONFIG_URCU_AVOID_SIGNALS ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_urcu_lgc_mb: test_urcu_gc.c ${URCU_MB}
+	$(CC) -DTEST_LOCAL_GC -DCONFIG_URCU_AVOID_SIGNALS ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+
+test_qsbr: test_qsbr.c ${URCU_QSBR}
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_qsbr_gc: test_qsbr_gc.c ${URCU_QSBR}
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_qsbr_lgc: test_qsbr_gc.c ${URCU_QSBR}
+	$(CC) -DTEST_LOCAL_GC ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+
+test_rwlock: test_rwlock.c ${URCU_SIGNAL}
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_perthreadlock: test_perthreadlock.c ${URCU_SIGNAL}
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_mutex: test_mutex.c ${URCU_SIGNAL}
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_urcu_dynamic_link: test_urcu.c ${URCU_SIGNAL}
+	$(CC) ${CFLAGS} -DDYNAMIC_LINK_TEST $(LDFLAGS) -o $@ $(SRC_DEP)
+
+
+test_urcu_yield: test_urcu.c ${URCU_SIGNAL_YIELD}
+	$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_urcu_timing: test_urcu_timing.c ${URCU_SIGNAL}
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_qsbr_timing: test_qsbr_timing.c ${URCU_QSBR}
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_rwlock_timing: test_rwlock_timing.c ${URCU_SIGNAL}
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+test_perthreadlock_timing: test_perthreadlock_timing.c ${URCU_SIGNAL}
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+urcu-asm.S: urcu-asm.c ${LIBDIR}/urcu.h
+	$(CC) ${CFLAGS} -S -o $@ $(SRC_DEP)
+
+urcu-asm.o: urcu-asm.c ${LIBDIR}/urcu.h
+	$(CC) ${CFLAGS} -c -o $@ $(SRC_DEP)
+
+urcutorture: urcutorture.c rcutorture.h api.h ${URCU_SIGNAL} 
+	$(CC) ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+urcutorture-yield: urcutorture.c ${URCU_SIGNAL_YIELD} rcutorture.h api.h
+	$(CC) -DDEBUG_YIELD ${CFLAGS} $(LDFLAGS) -o $@ $(SRC_DEP)
+
+,PHONY: clean
+
+clean:
+	rm -f *.o test_urcu test_urcu_dynamic_link test_urcu_timing \
+	test_rwlock_timing test_rwlock test_perthreadlock_timing \
+	test_perthreadlock test_urcu_yield test_urcu_mb \
+	urcu-asm.S test_qsbr_timing test_qsbr urcutorture \
+	urcutorture-yield liburcu.so api.h \
+	test_mutex test_urcu_gc test_urcu_gc_mb urcu-asm-1.S \
+	test_qsbr_lgc test_qsbr_gc test_looplen test_urcu_lgc \
+	test_urcu_lgc_mb
diff --git a/tests/Makefile64 b/tests/Makefile64
new file mode 100644
index 0000000..cca1cdc
--- /dev/null
+++ b/tests/Makefile64
@@ -0,0 +1,3 @@
+include Makefile.inc
+
+CFLAGS+=-m64
diff --git a/tests/api_gcc.h b/tests/api_gcc.h
new file mode 100644
index 0000000..1ad7345
--- /dev/null
+++ b/tests/api_gcc.h
@@ -0,0 +1,1328 @@
+#define _INCLUDE_API_H
+
+/*
+ * common.h: Common Linux kernel-isms.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; but version 2 of the License only due
+ * to code included from the Linux kernel.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) 2006 Paul E. McKenney, IBM.
+ *
+ * Much code taken from the Linux kernel.  For such code, the option
+ * to redistribute under later versions of GPL might not be available.
+ */
+
+#ifndef __always_inline
+#define __always_inline inline
+#endif
+
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
+
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)   __VA_ARGS__
+#  define ASM_CONST(x)          x
+#else
+/* This version of stringify will deal with commas... */
+#  define __stringify_in_c(...) #__VA_ARGS__
+#  define stringify_in_c(...)   __stringify_in_c(__VA_ARGS__) " "
+#  define __ASM_CONST(x)        x##UL
+#  define ASM_CONST(x)          __ASM_CONST(x)
+#endif
+
+
+/*
+ * arch-i386.h: Expose x86 atomic instructions.  80486 and better only.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, but version 2 only due to inclusion
+ * of Linux-kernel code.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) 2006 Paul E. McKenney, IBM.
+ *
+ * Much code taken from the Linux kernel.  For such code, the option
+ * to redistribute under later versions of GPL might not be available.
+ */
+
+/*
+ * Machine parameters.
+ */
+
+#define CACHE_LINE_SIZE 64
+#define ____cacheline_internodealigned_in_smp \
+	__attribute__((__aligned__(1 << 6)))
+
+#define LOCK_PREFIX "lock ; "
+
+/*
+ * Atomic data structure, initialization, and access.
+ */
+
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i)  { (i) }
+
+#define atomic_read(v)		((v)->counter)
+#define atomic_set(v, i)	(((v)->counter) = (i))
+
+/*
+ * Atomic operations.
+ */
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically adds @i to @v.
+ */
+
+static __inline__ void atomic_add(int i, atomic_t *v)
+{
+	(void)__sync_fetch_and_add(&v->counter, i);
+}
+
+/**
+ * atomic_sub - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v.
+ */
+static __inline__ void atomic_sub(int i, atomic_t *v)
+{
+	(void)__sync_fetch_and_add(&v->counter, -i);
+}
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
+{
+	return __sync_add_and_fetch(&v->counter, -i) == 0;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+	(void)__sync_fetch_and_add(&v->counter, 1);
+}
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1.
+ */ 
+static __inline__ void atomic_dec(atomic_t *v)
+{
+	(void)__sync_fetch_and_add(&v->counter, -1);
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */ 
+static __inline__ int atomic_dec_and_test(atomic_t *v)
+{
+	return __sync_add_and_fetch(&v->counter, -1) == 0;
+}
+
+/**
+ * atomic_inc_and_test - increment and test 
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */ 
+static __inline__ int atomic_inc_and_test(atomic_t *v)
+{
+	return __sync_add_and_fetch(&v->counter, 1) == 0;
+}
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ * 
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */ 
+static __inline__ int atomic_add_negative(int i, atomic_t *v)
+{
+	return __sync_add_and_fetch(&v->counter, i) < 0;
+}
+
+/**
+ * atomic_add_return - add and return
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static __inline__ int atomic_add_return(int i, atomic_t *v)
+{
+	return __sync_add_and_fetch(&v->counter, i);
+}
+
+static __inline__ int atomic_sub_return(int i, atomic_t *v)
+{
+	return atomic_add_return(-i,v);
+}
+
+static inline unsigned int
+cmpxchg(volatile long *ptr, long oldval, long newval)
+{
+	return __sync_val_compare_and_swap(ptr, oldval, newval);
+}
+
+#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	for (;;) {						\
+		if (unlikely(c == (u)))				\
+			break;					\
+		old = atomic_cmpxchg((v), c, c + (a));		\
+		if (likely(old == c))				\
+			break;					\
+		c = old;					\
+	}							\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+#define atomic_inc_return(v)  (atomic_add_return(1,v))
+#define atomic_dec_return(v)  (atomic_sub_return(1,v))
+
+/* Atomic operations are already serializing on x86 */
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+/*
+ * api_pthreads.h: API mapping to pthreads environment.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.  However, please note that much
+ * of the code in this file derives from the Linux kernel, and that such
+ * code may not be available except under GPLv2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) 2006 Paul E. McKenney, IBM.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/types.h>
+#define __USE_GNU
+#include <pthread.h>
+#include <sched.h>
+#include <sys/param.h>
+/* #include "atomic.h" */
+
+/*
+ * Compiler magic.
+ */
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Default machine parameters.
+ */
+
+#ifndef CACHE_LINE_SIZE
+#define CACHE_LINE_SIZE 128
+#endif /* #ifndef CACHE_LINE_SIZE */
+
+/*
+ * Exclusive locking primitives.
+ */
+
+typedef pthread_mutex_t spinlock_t;
+
+#define DEFINE_SPINLOCK(lock) spinlock_t lock = PTHREAD_MUTEX_INITIALIZER;
+#define __SPIN_LOCK_UNLOCKED(lockp) PTHREAD_MUTEX_INITIALIZER
+
+static void spin_lock_init(spinlock_t *sp)
+{
+	if (pthread_mutex_init(sp, NULL) != 0) {
+		perror("spin_lock_init:pthread_mutex_init");
+		exit(-1);
+	}
+}
+
+static void spin_lock(spinlock_t *sp)
+{
+	if (pthread_mutex_lock(sp) != 0) {
+		perror("spin_lock:pthread_mutex_lock");
+		exit(-1);
+	}
+}
+
+static void spin_unlock(spinlock_t *sp)
+{
+	if (pthread_mutex_unlock(sp) != 0) {
+		perror("spin_unlock:pthread_mutex_unlock");
+		exit(-1);
+	}
+}
+
+#define spin_lock_irqsave(l, f) do { f = 1; spin_lock(l); } while (0)
+#define spin_unlock_irqrestore(l, f) do { f = 0; spin_unlock(l); } while (0)
+
+/*
+ * Thread creation/destruction primitives.
+ */
+
+typedef pthread_t thread_id_t;
+
+#define NR_THREADS 128
+
+#define __THREAD_ID_MAP_EMPTY 0
+#define __THREAD_ID_MAP_WAITING 1
+thread_id_t __thread_id_map[NR_THREADS];
+spinlock_t __thread_id_map_mutex;
+
+#define for_each_thread(t) \
+	for (t = 0; t < NR_THREADS; t++)
+
+#define for_each_running_thread(t) \
+	for (t = 0; t < NR_THREADS; t++) \
+		if ((__thread_id_map[t] != __THREAD_ID_MAP_EMPTY) && \
+		    (__thread_id_map[t] != __THREAD_ID_MAP_WAITING))
+
+pthread_key_t thread_id_key;
+
+static int __smp_thread_id(void)
+{
+	int i;
+	thread_id_t tid = pthread_self();
+
+	for (i = 0; i < NR_THREADS; i++) {
+		if (__thread_id_map[i] == tid) {
+			long v = i + 1;  /* must be non-NULL. */
+
+			if (pthread_setspecific(thread_id_key, (void *)v) != 0) {
+				perror("pthread_setspecific");
+				exit(-1);
+			}
+			return i;
+		}
+	}
+	spin_lock(&__thread_id_map_mutex);
+	for (i = 0; i < NR_THREADS; i++) {
+		if (__thread_id_map[i] == tid)
+			spin_unlock(&__thread_id_map_mutex);
+			return i;
+	}
+	spin_unlock(&__thread_id_map_mutex);
+	fprintf(stderr, "smp_thread_id: Rogue thread, id: %d(%#x)\n",
+			(int)tid, (int)tid);
+	exit(-1);
+}
+
+static int smp_thread_id(void)
+{
+	void *id;
+
+	id = pthread_getspecific(thread_id_key);
+	if (id == NULL)
+		return __smp_thread_id();
+	return (long)(id - 1);
+}
+
+static thread_id_t create_thread(void *(*func)(void *), void *arg)
+{
+	thread_id_t tid;
+	int i;
+
+	spin_lock(&__thread_id_map_mutex);
+	for (i = 0; i < NR_THREADS; i++) {
+		if (__thread_id_map[i] == __THREAD_ID_MAP_EMPTY)
+			break;
+	}
+	if (i >= NR_THREADS) {
+		spin_unlock(&__thread_id_map_mutex);
+		fprintf(stderr, "Thread limit of %d exceeded!\n", NR_THREADS);
+		exit(-1);
+	}
+	__thread_id_map[i] = __THREAD_ID_MAP_WAITING;
+	spin_unlock(&__thread_id_map_mutex);
+	if (pthread_create(&tid, NULL, func, arg) != 0) {
+		perror("create_thread:pthread_create");
+		exit(-1);
+	}
+	__thread_id_map[i] = tid;
+	return tid;
+}
+
+static void *wait_thread(thread_id_t tid)
+{
+	int i;
+	void *vp;
+
+	for (i = 0; i < NR_THREADS; i++) {
+		if (__thread_id_map[i] == tid)
+			break;
+	}
+	if (i >= NR_THREADS){
+		fprintf(stderr, "wait_thread: bad tid = %d(%#x)\n",
+				(int)tid, (int)tid);
+		exit(-1);
+	}
+	if (pthread_join(tid, &vp) != 0) {
+		perror("wait_thread:pthread_join");
+		exit(-1);
+	}
+	__thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
+	return vp;
+}
+
+static void wait_all_threads(void)
+{
+	int i;
+	thread_id_t tid;
+
+	for (i = 1; i < NR_THREADS; i++) {
+		tid = __thread_id_map[i];
+		if (tid != __THREAD_ID_MAP_EMPTY &&
+		    tid != __THREAD_ID_MAP_WAITING)
+			(void)wait_thread(tid);
+	}
+}
+
+static void run_on(int cpu)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+/*
+ * timekeeping -- very crude -- should use MONOTONIC...
+ */
+
+long long get_microseconds(void)
+{
+	struct timeval tv;
+
+	if (gettimeofday(&tv, NULL) != 0)
+		abort();
+	return ((long long)tv.tv_sec) * 1000000LL + (long long)tv.tv_usec;
+}
+
+/*
+ * Per-thread variables.
+ */
+
+#define DEFINE_PER_THREAD(type, name) \
+	struct { \
+		__typeof__(type) v \
+			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
+	} __per_thread_##name[NR_THREADS];
+#define DECLARE_PER_THREAD(type, name) extern DEFINE_PER_THREAD(type, name)
+
+#define per_thread(name, thread) __per_thread_##name[thread].v
+#define __get_thread_var(name) per_thread(name, smp_thread_id())
+
+#define init_per_thread(name, v) \
+	do { \
+		int __i_p_t_i; \
+		for (__i_p_t_i = 0; __i_p_t_i < NR_THREADS; __i_p_t_i++) \
+			per_thread(name, __i_p_t_i) = v; \
+	} while (0)
+
+/*
+ * CPU traversal primitives.
+ */
+
+#ifndef NR_CPUS
+#define NR_CPUS 16
+#endif /* #ifndef NR_CPUS */
+
+#define for_each_possible_cpu(cpu) \
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+#define for_each_online_cpu(cpu) \
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+
+/*
+ * Per-CPU variables.
+ */
+
+#define DEFINE_PER_CPU(type, name) \
+	struct { \
+		__typeof__(type) v \
+			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
+	} __per_cpu_##name[NR_CPUS]
+#define DECLARE_PER_CPU(type, name) extern DEFINE_PER_CPU(type, name)
+
+DEFINE_PER_THREAD(int, smp_processor_id);
+
+#define per_cpu(name, thread) __per_cpu_##name[thread].v
+#define __get_cpu_var(name) per_cpu(name, smp_processor_id())
+
+#define init_per_cpu(name, v) \
+	do { \
+		int __i_p_c_i; \
+		for (__i_p_c_i = 0; __i_p_c_i < NR_CPUS; __i_p_c_i++) \
+			per_cpu(name, __i_p_c_i) = v; \
+	} while (0)
+
+/*
+ * CPU state checking (crowbarred).
+ */
+
+#define idle_cpu(cpu) 0
+#define in_softirq() 1
+#define hardirq_count() 0
+#define PREEMPT_SHIFT   0
+#define SOFTIRQ_SHIFT   (PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT   (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define PREEMPT_BITS    8
+#define SOFTIRQ_BITS    8
+
+/*
+ * CPU hotplug.
+ */
+
+struct notifier_block {
+	int (*notifier_call)(struct notifier_block *, unsigned long, void *);
+	struct notifier_block *next;
+	int priority;
+};
+
+#define CPU_ONLINE		0x0002 /* CPU (unsigned)v is up */
+#define CPU_UP_PREPARE		0x0003 /* CPU (unsigned)v coming up */
+#define CPU_UP_CANCELED		0x0004 /* CPU (unsigned)v NOT coming up */
+#define CPU_DOWN_PREPARE	0x0005 /* CPU (unsigned)v going down */
+#define CPU_DOWN_FAILED		0x0006 /* CPU (unsigned)v NOT going down */
+#define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
+#define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
+				        * not handling interrupts, soon dead */
+#define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
+					* lock is dropped */
+
+/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
+ * operation in progress
+ */
+#define CPU_TASKS_FROZEN	0x0010
+
+#define CPU_ONLINE_FROZEN	(CPU_ONLINE | CPU_TASKS_FROZEN)
+#define CPU_UP_PREPARE_FROZEN	(CPU_UP_PREPARE | CPU_TASKS_FROZEN)
+#define CPU_UP_CANCELED_FROZEN	(CPU_UP_CANCELED | CPU_TASKS_FROZEN)
+#define CPU_DOWN_PREPARE_FROZEN	(CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
+#define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
+#define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
+#define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
+
+/* Hibernation and suspend events */
+#define PM_HIBERNATION_PREPARE	0x0001 /* Going to hibernate */
+#define PM_POST_HIBERNATION	0x0002 /* Hibernation finished */
+#define PM_SUSPEND_PREPARE	0x0003 /* Going to suspend the system */
+#define PM_POST_SUSPEND		0x0004 /* Suspend finished */
+#define PM_RESTORE_PREPARE	0x0005 /* Going to restore a saved image */
+#define PM_POST_RESTORE		0x0006 /* Restore failed */
+
+#define NOTIFY_DONE		0x0000		/* Don't care */
+#define NOTIFY_OK		0x0001		/* Suits me */
+#define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
+#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)
+						/* Bad/Veto action */
+/*
+ * Clean way to return from the notifier and stop further calls.
+ */
+#define NOTIFY_STOP		(NOTIFY_OK|NOTIFY_STOP_MASK)
+
+/*
+ * Bug checks.
+ */
+
+#define BUG_ON(c) do { if (!(c)) abort(); } while (0)
+
+/*
+ * Initialization -- Must be called before calling any primitives.
+ */
+
+static void smp_init(void)
+{
+	int i;
+
+	spin_lock_init(&__thread_id_map_mutex);
+	__thread_id_map[0] = pthread_self();
+	for (i = 1; i < NR_THREADS; i++)
+		__thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
+	init_per_thread(smp_processor_id, 0);
+	if (pthread_key_create(&thread_id_key, NULL) != 0) {
+		perror("pthread_key_create");
+		exit(-1);
+	}
+}
+
+/* Taken from the Linux kernel source tree, so GPLv2-only!!! */
+
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+#define LIST_POISON1  ((void *) 0x00100100)
+#define LIST_POISON2  ((void *) 0x00200200)
+
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+#else
+extern void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next);
+#endif
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty() on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	entry->next = LIST_POISON1;
+	entry->prev = LIST_POISON2;
+}
+#else
+extern void list_del(struct list_head *entry);
+#endif
+
+/**
+ * list_replace - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ *
+ * If @old was empty, it will be overwritten.
+ */
+static inline void list_replace(struct list_head *old,
+				struct list_head *new)
+{
+	new->next = old->next;
+	new->next->prev = new;
+	new->prev = old->prev;
+	new->prev->next = new;
+}
+
+static inline void list_replace_init(struct list_head *old,
+					struct list_head *new)
+{
+	list_replace(old, new);
+	INIT_LIST_HEAD(old);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+	__list_del(list->prev, list->next);
+	list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+				  struct list_head *head)
+{
+	__list_del(list->prev, list->next);
+	list_add_tail(list, head);
+}
+
+/**
+ * list_is_last - tests whether @list is the last entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_last(const struct list_head *list,
+				const struct list_head *head)
+{
+	return list->next == head;
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+	return head->next == head;
+}
+
+/**
+ * list_empty_careful - tests whether a list is empty and not being modified
+ * @head: the list to test
+ *
+ * Description:
+ * tests whether a list is empty _and_ checks that no other CPU might be
+ * in the process of modifying either member (next or prev)
+ *
+ * NOTE: using list_empty_careful() without synchronization
+ * can only be safe if the only activity that can happen
+ * to the list entry is list_del_init(). Eg. it cannot be used
+ * if another CPU could re-list_add() it.
+ */
+static inline int list_empty_careful(const struct list_head *head)
+{
+	struct list_head *next = head->next;
+	return (next == head) && (next == head->prev);
+}
+
+/**
+ * list_is_singular - tests whether a list has just one entry.
+ * @head: the list to test.
+ */
+static inline int list_is_singular(const struct list_head *head)
+{
+	return !list_empty(head) && (head->next == head->prev);
+}
+
+static inline void __list_cut_position(struct list_head *list,
+		struct list_head *head, struct list_head *entry)
+{
+	struct list_head *new_first = entry->next;
+	list->next = head->next;
+	list->next->prev = list;
+	list->prev = entry;
+	entry->next = list;
+	head->next = new_first;
+	new_first->prev = head;
+}
+
+/**
+ * list_cut_position - cut a list into two
+ * @list: a new list to add all removed entries
+ * @head: a list with entries
+ * @entry: an entry within head, could be the head itself
+ *	and if so we won't cut the list
+ *
+ * This helper moves the initial part of @head, up to and
+ * including @entry, from @head to @list. You should
+ * pass on @entry an element you know is on @head. @list
+ * should be an empty list or a list you do not care about
+ * losing its data.
+ *
+ */
+static inline void list_cut_position(struct list_head *list,
+		struct list_head *head, struct list_head *entry)
+{
+	if (list_empty(head))
+		return;
+	if (list_is_singular(head) &&
+		(head->next != entry && head != entry))
+		return;
+	if (entry == head)
+		INIT_LIST_HEAD(list);
+	else
+		__list_cut_position(list, head, entry);
+}
+
+static inline void __list_splice(const struct list_head *list,
+				 struct list_head *prev,
+				 struct list_head *next)
+{
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+
+	first->prev = prev;
+	prev->next = first;
+
+	last->next = next;
+	next->prev = last;
+}
+
+/**
+ * list_splice - join two lists, this is designed for stacks
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(const struct list_head *list,
+				struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head, head->next);
+}
+
+/**
+ * list_splice_tail - join two lists, each list being a queue
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice_tail(struct list_head *list,
+				struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head->prev, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+				    struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head, head->next);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+/**
+ * list_splice_tail_init - join two lists and reinitialise the emptied list
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * Each of the lists is a queue.
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_tail_init(struct list_head *list,
+					 struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head->prev, head);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:	the &struct list_head pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+	container_of(ptr, type, member)
+
+/**
+ * list_first_entry - get the first element from a list
+ * @ptr:	the list head to take the element from.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_first_entry(ptr, type, member) \
+	list_entry((ptr)->next, type, member)
+
+/**
+ * list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each(pos, head) \
+	for (pos = (head)->next; prefetch(pos->next), pos != (head); \
+        	pos = pos->next)
+
+/**
+ * __list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ *
+ * This variant differs from list_for_each() in that it's the
+ * simplest possible list iteration code, no prefetching is done.
+ * Use this for code that knows the list to be very short (empty
+ * or 1 entry) most of the time.
+ */
+#define __list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev	-	iterate over a list backwards
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+	for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \
+        	pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+
+/**
+ * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_prev_safe(pos, n, head) \
+	for (pos = (head)->prev, n = pos->prev; \
+	     prefetch(pos->prev), pos != (head); \
+	     pos = n, n = pos->prev)
+
+/**
+ * list_for_each_entry	-	iterate over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member)				\
+	for (pos = list_entry((head)->next, typeof(*pos), member);	\
+	     prefetch(pos->member.next), &pos->member != (head); 	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member)			\
+	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
+	     prefetch(pos->member.prev), &pos->member != (head); 	\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
+ * @pos:	the type * to use as a start point
+ * @head:	the head of the list
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Prepares a pos entry for use as a start point in list_for_each_entry_continue().
+ */
+#define list_prepare_entry(pos, head, member) \
+	((pos) ? : list_entry(head, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue - continue iteration over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue(pos, head, member) 		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
+	     prefetch(pos->member.next), &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue_reverse - iterate backwards from the given point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Start to iterate over list of given type backwards, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue_reverse(pos, head, member)		\
+	for (pos = list_entry(pos->member.prev, typeof(*pos), member);	\
+	     prefetch(pos->member.prev), &pos->member != (head);	\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_from - iterate over list of given type from the current point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing from current position.
+ */
+#define list_for_each_entry_from(pos, head, member) 			\
+	for (; prefetch(pos->member.next), &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member)			\
+	for (pos = list_entry((head)->next, typeof(*pos), member),	\
+		n = list_entry(pos->member.next, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_continue
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing after current point,
+ * safe against removal of list entry.
+ */
+#define list_for_each_entry_safe_continue(pos, n, head, member) 		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member), 		\
+		n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_from
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_for_each_entry_safe_from(pos, n, head, member) 			\
+	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_reverse
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member)		\
+	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
+		n = list_entry(pos->member.prev, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
+/*
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is
+ * too wasteful.
+ * You lose the ability to access the tail in O(1).
+ */
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+	h->next = NULL;
+	h->pprev = NULL;
+}
+
+static inline int hlist_unhashed(const struct hlist_node *h)
+{
+	return !h->pprev;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+	return !h->first;
+}
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+	struct hlist_node *next = n->next;
+	struct hlist_node **pprev = n->pprev;
+	*pprev = next;
+	if (next)
+		next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->next = LIST_POISON1;
+	n->pprev = LIST_POISON2;
+}
+
+static inline void hlist_del_init(struct hlist_node *n)
+{
+	if (!hlist_unhashed(n)) {
+		__hlist_del(n);
+		INIT_HLIST_NODE(n);
+	}
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+	n->pprev = &h->first;
+}
+
+/* next must be != NULL */
+static inline void hlist_add_before(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	n->pprev = next->pprev;
+	n->next = next;
+	next->pprev = &n->next;
+	*(n->pprev) = n;
+}
+
+static inline void hlist_add_after(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	next->next = n->next;
+	n->next = next;
+	next->pprev = &n->next;
+
+	if(next->next)
+		next->next->pprev  = &next->next;
+}
+
+/*
+ * Move a list from one list head to another. Fixup the pprev
+ * reference of the first entry if it exists.
+ */
+static inline void hlist_move_list(struct hlist_head *old,
+				   struct hlist_head *new)
+{
+	new->first = old->first;
+	if (new->first)
+		new->first->pprev = &new->first;
+	old->first = NULL;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_for_each(pos, head) \
+	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
+	     pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
+	     pos = n)
+
+/**
+ * hlist_for_each_entry	- iterate over list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry(tpos, pos, head, member)			 \
+	for (pos = (head)->first;					 \
+	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue(tpos, pos, member)		 \
+	for (pos = (pos)->next;						 \
+	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_from - iterate over a hlist continuing from current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from(tpos, pos, member)			 \
+	for (; pos && ({ prefetch(pos->next); 1;}) &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @n:		another &struct hlist_node to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
+	for (pos = (head)->first;					 \
+	     pos && ({ n = pos->next; 1; }) && 				 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = n)
+
+#endif
diff --git a/tests/api_ppc.h b/tests/api_ppc.h
new file mode 100644
index 0000000..8a03faa
--- /dev/null
+++ b/tests/api_ppc.h
@@ -0,0 +1,1699 @@
+/* MECHANICALLY GENERATED, DO NOT EDIT!!! */
+
+#define _INCLUDE_API_H
+
+/*
+ * common.h: Common Linux kernel-isms.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; but version 2 of the License only due
+ * to code included from the Linux kernel.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) 2006 Paul E. McKenney, IBM.
+ *
+ * Much code taken from the Linux kernel.  For such code, the option
+ * to redistribute under later versions of GPL might not be available.
+ */
+
+#ifndef __always_inline
+#define __always_inline inline
+#endif
+
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
+
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)   __VA_ARGS__
+#  define ASM_CONST(x)          x
+#else
+/* This version of stringify will deal with commas... */
+#  define __stringify_in_c(...) #__VA_ARGS__
+#  define stringify_in_c(...)   __stringify_in_c(__VA_ARGS__) " "
+#  define __ASM_CONST(x)        x##UL
+#  define ASM_CONST(x)          __ASM_CONST(x)
+#endif
+
+
+/*
+ * arch-ppc64.h: Expose PowerPC atomic instructions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; but version 2 of the License only due
+ * to code included from the Linux kernel.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) 2006 Paul E. McKenney, IBM.
+ *
+ * Much code taken from the Linux kernel.  For such code, the option
+ * to redistribute under later versions of GPL might not be available.
+ */
+
+/*
+ * Machine parameters.
+ */
+
+#define CONFIG_PPC64
+
+#define CACHE_LINE_SIZE 128
+#define ____cacheline_internodealigned_in_smp \
+	__attribute__((__aligned__(1 << 7)))
+
+/*
+ * Atomic data structure, initialization, and access.
+ */
+
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i)  { (i) }
+
+#define atomic_read(v)		((v)->counter)
+#define atomic_set(v, i)	(((v)->counter) = (i))
+
+/*
+ * Atomic operations.
+ */
+
+#define LWSYNC lwsync
+#define PPC405_ERR77(ra,rb)
+#ifdef CONFIG_SMP
+#  define LWSYNC_ON_SMP stringify_in_c(LWSYNC) "\n"
+#  define ISYNC_ON_SMP "\n\tisync\n"
+#else
+#  define LWSYNC_ON_SMP
+#  define ISYNC_ON_SMP
+#endif
+
+
+/*
+ * Atomic exchange
+ *
+ * Changes the memory location '*ptr' to be val and returns
+ * the previous value stored there.
+ */
+static __always_inline unsigned long
+__xchg_u32(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+	LWSYNC_ON_SMP
+"1:	lwarx	%0,0,%2 \n"
+	PPC405_ERR77(0,%2)
+"	stwcx.	%3,0,%2 \n\
+	bne-	1b"
+	ISYNC_ON_SMP
+	: "=&r" (prev), "+m" (*(volatile unsigned int *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+/*
+ * Atomic exchange
+ *
+ * Changes the memory location '*ptr' to be val and returns
+ * the previous value stored there.
+ */
+static __always_inline unsigned long
+__xchg_u32_local(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lwarx	%0,0,%2 \n"
+	PPC405_ERR77(0,%2)
+"	stwcx.	%3,0,%2 \n\
+	bne-	1b"
+	: "=&r" (prev), "+m" (*(volatile unsigned int *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+#ifdef CONFIG_PPC64
+static __always_inline unsigned long
+__xchg_u64(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+	LWSYNC_ON_SMP
+"1:	ldarx	%0,0,%2 \n"
+	PPC405_ERR77(0,%2)
+"	stdcx.	%3,0,%2 \n\
+	bne-	1b"
+	ISYNC_ON_SMP
+	: "=&r" (prev), "+m" (*(volatile unsigned long *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u64_local(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	ldarx	%0,0,%2 \n"
+	PPC405_ERR77(0,%2)
+"	stdcx.	%3,0,%2 \n\
+	bne-	1b"
+	: "=&r" (prev), "+m" (*(volatile unsigned long *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+#endif
+
+/*
+ * This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid xchg().
+ */
+extern void __xchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long
+__xchg(volatile void *ptr, unsigned long x, unsigned int size)
+{
+	switch (size) {
+	case 4:
+		return __xchg_u32(ptr, x);
+#ifdef CONFIG_PPC64
+	case 8:
+		return __xchg_u64(ptr, x);
+#endif
+	}
+	__xchg_called_with_bad_pointer();
+	return x;
+}
+
+static __always_inline unsigned long
+__xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
+{
+	switch (size) {
+	case 4:
+		return __xchg_u32_local(ptr, x);
+#ifdef CONFIG_PPC64
+	case 8:
+		return __xchg_u64_local(ptr, x);
+#endif
+	}
+	__xchg_called_with_bad_pointer();
+	return x;
+}
+#define xchg(ptr,x)							     \
+  ({									     \
+     __typeof__(*(ptr)) _x_ = (x);					     \
+     (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
+  })
+
+#define xchg_local(ptr,x)						     \
+  ({									     \
+     __typeof__(*(ptr)) _x_ = (x);					     \
+     (__typeof__(*(ptr))) __xchg_local((ptr),				     \
+     		(unsigned long)_x_, sizeof(*(ptr))); 			     \
+  })
+
+/*
+ * Compare and exchange - if *p == old, set it to new,
+ * and return the old value of *p.
+ */
+#define __HAVE_ARCH_CMPXCHG	1
+
+static __always_inline unsigned long
+__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+	LWSYNC_ON_SMP
+"1:	lwarx	%0,0,%2		# __cmpxchg_u32\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+	PPC405_ERR77(0,%2)
+"	stwcx.	%4,0,%2\n\
+	bne-	1b"
+	ISYNC_ON_SMP
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old,
+			unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+"1:	lwarx	%0,0,%2		# __cmpxchg_u32\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+	PPC405_ERR77(0,%2)
+"	stwcx.	%4,0,%2\n\
+	bne-	1b"
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+#ifdef CONFIG_PPC64
+static __always_inline unsigned long
+__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+	LWSYNC_ON_SMP
+"1:	ldarx	%0,0,%2		# __cmpxchg_u64\n\
+	cmpd	0,%0,%3\n\
+	bne-	2f\n\
+	stdcx.	%4,0,%2\n\
+	bne-	1b"
+	ISYNC_ON_SMP
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u64_local(volatile unsigned long *p, unsigned long old,
+			unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	ldarx	%0,0,%2		# __cmpxchg_u64\n\
+	cmpd	0,%0,%3\n\
+	bne-	2f\n\
+	stdcx.	%4,0,%2\n\
+	bne-	1b"
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+#endif
+
+/* This function doesn't exist, so you'll get a linker error
+   if something tries to do an invalid cmpxchg().  */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
+	  unsigned int size)
+{
+	switch (size) {
+	case 4:
+		return __cmpxchg_u32(ptr, old, new);
+#ifdef CONFIG_PPC64
+	case 8:
+		return __cmpxchg_u64(ptr, old, new);
+#endif
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+static __always_inline unsigned long
+__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
+	  unsigned int size)
+{
+	switch (size) {
+	case 4:
+		return __cmpxchg_u32_local(ptr, old, new);
+#ifdef CONFIG_PPC64
+	case 8:
+		return __cmpxchg_u64_local(ptr, old, new);
+#endif
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#define cmpxchg(ptr, o, n)						 \
+  ({									 \
+     __typeof__(*(ptr)) _o_ = (o);					 \
+     __typeof__(*(ptr)) _n_ = (n);					 \
+     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
+				    (unsigned long)_n_, sizeof(*(ptr))); \
+  })
+
+
+#define cmpxchg_local(ptr, o, n)					 \
+  ({									 \
+     __typeof__(*(ptr)) _o_ = (o);					 \
+     __typeof__(*(ptr)) _n_ = (n);					 \
+     (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_,	 \
+				    (unsigned long)_n_, sizeof(*(ptr))); \
+  })
+
+#ifdef CONFIG_PPC64
+/*
+ * We handle most unaligned accesses in hardware. On the other hand 
+ * unaligned DMA can be very expensive on some ppc64 IO chips (it does
+ * powers of 2 writes until it reaches sufficient alignment).
+ *
+ * Based on this we disable the IP header alignment in network drivers.
+ * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining
+ * cacheline alignment of buffers.
+ */
+#define NET_IP_ALIGN	0
+#define NET_SKB_PAD	L1_CACHE_BYTES
+
+#define cmpxchg64(ptr, o, n)						\
+  ({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	cmpxchg((ptr), (o), (n));					\
+  })
+#define cmpxchg64_local(ptr, o, n)					\
+  ({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	cmpxchg_local((ptr), (o), (n));					\
+  })
+#endif
+
+#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically adds @a to @v.
+ */
+static __inline__ void atomic_add(int a, atomic_t *v)
+{
+	int t;
+
+	__asm__ __volatile__(
+	"1:	lwarx	%0,0,%3		# atomic_add\n\
+		add	%0,%2,%0 \n\
+		stwcx.	%0,0,%3 \n\
+		bne-	1b"
+		: "=&r" (t), "+m" (v->counter)
+		: "r" (a), "r" (&v->counter)
+		: "cc");
+}
+
+/**
+ * atomic_sub - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @a from @v.
+ */
+static __inline__ void atomic_sub(int a, atomic_t *v)
+{
+	int t;
+
+	__asm__ __volatile__(
+	"1:	lwarx	%0,0,%3		# atomic_sub \n\
+		subf	%0,%2,%0 \n\
+		stwcx.	%0,0,%3 \n\
+		bne-	1b"
+		: "=&r" (t), "+m" (v->counter)
+		: "r" (a), "r" (&v->counter)
+		: "cc");
+}
+
+static __inline__ atomic_sub_return(int a, atomic_t *v)
+{
+	int t;
+
+	__asm__ __volatile__(
+		"lwsync\n\
+	1:	lwarx	%0,0,%2		# atomic_sub_return\n\
+		subf	%0,%1,%0\n\
+		stwcx.	%0,0,%2 \n\
+		bne-	1b \n\
+		isync"
+		: "=&r" (t)
+		: "r" (a), "r" (&v->counter)
+		: "cc", "memory");
+
+	return t;
+}
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static __inline__ int atomic_sub_and_test(int a, atomic_t *v)
+{
+	return atomic_sub_return(a, v) == 0;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+	atomic_add(1, v);
+}
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1.
+ */ 
+static __inline__ void atomic_dec(atomic_t *v)
+{
+	atomic_sub(1, v);
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */ 
+static __inline__ int atomic_dec_and_test(atomic_t *v)
+{
+	return atomic_sub_and_test(1, v);
+}
+
+/**
+ * atomic_inc_and_test - increment and test 
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */ 
+static __inline__ int atomic_inc_and_test(atomic_t *v)
+{
+	return atomic_inc_return(v);
+}
+
+/**
+ * atomic_add_return - add and return
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static __inline__ int atomic_add_return(int a, atomic_t *v)
+{
+	int t;
+
+	__asm__ __volatile__(
+		"lwsync \n\
+	1:	lwarx	%0,0,%2		 # atomic_add_return \n\
+		add	%0,%1,%0 \n\
+		stwcx.	%0,0,%2 \n\
+		bne-	1b \n\
+		isync"
+		: "=&r" (t)
+		: "r" (a), "r" (&v->counter)
+		: "cc", "memory");
+
+	return t;
+}
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ * 
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */ 
+static __inline__ int atomic_add_negative(int a, atomic_t *v)
+{
+	return atomic_add_return(a, v) < 0;
+}
+
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int t;
+
+	__asm__ __volatile__(
+		"lwsync \n\
+	1:	lwarx	%0,0,%1		# atomic_add_unless\n\
+		cmpd	0,%0,%3 \n\
+		beq-	2f \n\
+		add	%0,%2,%0 \n\
+		stwcx.	%0,0,%1 \n\
+		bne-	1b \n\
+		isync \n\
+		subf	%0,%2,%0 \n\
+	2:"
+		: "=&r" (t)
+		: "r" (&v->counter), "r" (a), "r" (u)
+		: "cc", "memory");
+
+	return t != u;
+}
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+#define atomic_inc_return(v)  (atomic_add_return(1,v))
+#define atomic_dec_return(v)  (atomic_sub_return(1,v))
+
+/* Atomic operations are already serializing on x86 */
+#define smp_mb__before_atomic_dec()	smp_mb()
+#define smp_mb__after_atomic_dec()	smp_mb()
+#define smp_mb__before_atomic_inc()	smp_mb()
+#define smp_mb__after_atomic_inc()	smp_mb()
+
+/*
+ * api_pthreads.h: API mapping to pthreads environment.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.  However, please note that much
+ * of the code in this file derives from the Linux kernel, and that such
+ * code may not be available except under GPLv2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) 2006 Paul E. McKenney, IBM.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/types.h>
+#define __USE_GNU
+#include <pthread.h>
+#include <sched.h>
+#include <sys/param.h>
+/* #include "atomic.h" */
+
+/*
+ * Compiler magic.
+ */
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Default machine parameters.
+ */
+
+#ifndef CACHE_LINE_SIZE
+#define CACHE_LINE_SIZE 128
+#endif /* #ifndef CACHE_LINE_SIZE */
+
+/*
+ * Exclusive locking primitives.
+ */
+
+typedef pthread_mutex_t spinlock_t;
+
+#define DEFINE_SPINLOCK(lock) spinlock_t lock = PTHREAD_MUTEX_INITIALIZER;
+#define __SPIN_LOCK_UNLOCKED(lockp) PTHREAD_MUTEX_INITIALIZER
+
+static void spin_lock_init(spinlock_t *sp)
+{
+	if (pthread_mutex_init(sp, NULL) != 0) {
+		perror("spin_lock_init:pthread_mutex_init");
+		exit(-1);
+	}
+}
+
+static void spin_lock(spinlock_t *sp)
+{
+	if (pthread_mutex_lock(sp) != 0) {
+		perror("spin_lock:pthread_mutex_lock");
+		exit(-1);
+	}
+}
+
+static void spin_unlock(spinlock_t *sp)
+{
+	if (pthread_mutex_unlock(sp) != 0) {
+		perror("spin_unlock:pthread_mutex_unlock");
+		exit(-1);
+	}
+}
+
+#define spin_lock_irqsave(l, f) do { f = 1; spin_lock(l); } while (0)
+#define spin_unlock_irqrestore(l, f) do { f = 0; spin_unlock(l); } while (0)
+
+/*
+ * Thread creation/destruction primitives.
+ */
+
+typedef pthread_t thread_id_t;
+
+#define NR_THREADS 128
+
+#define __THREAD_ID_MAP_EMPTY 0
+#define __THREAD_ID_MAP_WAITING 1
+thread_id_t __thread_id_map[NR_THREADS];
+spinlock_t __thread_id_map_mutex;
+
+#define for_each_thread(t) \
+	for (t = 0; t < NR_THREADS; t++)
+
+#define for_each_running_thread(t) \
+	for (t = 0; t < NR_THREADS; t++) \
+		if ((__thread_id_map[t] != __THREAD_ID_MAP_EMPTY) && \
+		    (__thread_id_map[t] != __THREAD_ID_MAP_WAITING))
+
+#define for_each_tid(t, tid) \
+	for (t = 0; t < NR_THREADS; t++) \
+		if ((((tid) = __thread_id_map[t]) != __THREAD_ID_MAP_EMPTY) && \
+		    ((tid) != __THREAD_ID_MAP_WAITING))
+
+pthread_key_t thread_id_key;
+
+static int __smp_thread_id(void)
+{
+	int i;
+	thread_id_t tid = pthread_self();
+
+	for (i = 0; i < NR_THREADS; i++) {
+		if (__thread_id_map[i] == tid) {
+			long v = i + 1;  /* must be non-NULL. */
+
+			if (pthread_setspecific(thread_id_key, (void *)v) != 0) {
+				perror("pthread_setspecific");
+				exit(-1);
+			}
+			return i;
+		}
+	}
+	spin_lock(&__thread_id_map_mutex);
+	for (i = 0; i < NR_THREADS; i++) {
+		if (__thread_id_map[i] == tid)
+			spin_unlock(&__thread_id_map_mutex);
+			return i;
+	}
+	spin_unlock(&__thread_id_map_mutex);
+	fprintf(stderr, "smp_thread_id: Rogue thread, id: %d(%#x)\n",
+			(int)tid, (int)tid);
+	exit(-1);
+}
+
+static int smp_thread_id(void)
+{
+	void *id;
+
+	id = pthread_getspecific(thread_id_key);
+	if (id == NULL)
+		return __smp_thread_id();
+	return (long)(id - 1);
+}
+
+static thread_id_t create_thread(void *(*func)(void *), void *arg)
+{
+	thread_id_t tid;
+	int i;
+
+	spin_lock(&__thread_id_map_mutex);
+	for (i = 0; i < NR_THREADS; i++) {
+		if (__thread_id_map[i] == __THREAD_ID_MAP_EMPTY)
+			break;
+	}
+	if (i >= NR_THREADS) {
+		spin_unlock(&__thread_id_map_mutex);
+		fprintf(stderr, "Thread limit of %d exceeded!\n", NR_THREADS);
+		exit(-1);
+	}
+	__thread_id_map[i] = __THREAD_ID_MAP_WAITING;
+	spin_unlock(&__thread_id_map_mutex);
+	if (pthread_create(&tid, NULL, func, arg) != 0) {
+		perror("create_thread:pthread_create");
+		exit(-1);
+	}
+	__thread_id_map[i] = tid;
+	return tid;
+}
+
+static void *wait_thread(thread_id_t tid)
+{
+	int i;
+	void *vp;
+
+	for (i = 0; i < NR_THREADS; i++) {
+		if (__thread_id_map[i] == tid)
+			break;
+	}
+	if (i >= NR_THREADS){
+		fprintf(stderr, "wait_thread: bad tid = %d(%#x)\n",
+				(int)tid, (int)tid);
+		exit(-1);
+	}
+	if (pthread_join(tid, &vp) != 0) {
+		perror("wait_thread:pthread_join");
+		exit(-1);
+	}
+	__thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
+	return vp;
+}
+
+static void wait_all_threads(void)
+{
+	int i;
+	thread_id_t tid;
+
+	for (i = 1; i < NR_THREADS; i++) {
+		tid = __thread_id_map[i];
+		if (tid != __THREAD_ID_MAP_EMPTY &&
+		    tid != __THREAD_ID_MAP_WAITING)
+			(void)wait_thread(tid);
+	}
+}
+
+static void run_on(int cpu)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+/*
+ * timekeeping -- very crude -- should use MONOTONIC...
+ */
+
+long long get_microseconds(void)
+{
+	struct timeval tv;
+
+	if (gettimeofday(&tv, NULL) != 0)
+		abort();
+	return ((long long)tv.tv_sec) * 1000000LL + (long long)tv.tv_usec;
+}
+
+/*
+ * Per-thread variables.
+ */
+
+#define DEFINE_PER_THREAD(type, name) \
+	struct { \
+		__typeof__(type) v \
+			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
+	} __per_thread_##name[NR_THREADS];
+#define DECLARE_PER_THREAD(type, name) extern DEFINE_PER_THREAD(type, name)
+
+#define per_thread(name, thread) __per_thread_##name[thread].v
+#define __get_thread_var(name) per_thread(name, smp_thread_id())
+
+#define init_per_thread(name, v) \
+	do { \
+		int __i_p_t_i; \
+		for (__i_p_t_i = 0; __i_p_t_i < NR_THREADS; __i_p_t_i++) \
+			per_thread(name, __i_p_t_i) = v; \
+	} while (0)
+
+/*
+ * CPU traversal primitives.
+ */
+
+#ifndef NR_CPUS
+#define NR_CPUS 16
+#endif /* #ifndef NR_CPUS */
+
+#define for_each_possible_cpu(cpu) \
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+#define for_each_online_cpu(cpu) \
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+
+/*
+ * Per-CPU variables.
+ */
+
+#define DEFINE_PER_CPU(type, name) \
+	struct { \
+		__typeof__(type) v \
+			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
+	} __per_cpu_##name[NR_CPUS]
+#define DECLARE_PER_CPU(type, name) extern DEFINE_PER_CPU(type, name)
+
+DEFINE_PER_THREAD(int, smp_processor_id);
+
+#define per_cpu(name, thread) __per_cpu_##name[thread].v
+#define __get_cpu_var(name) per_cpu(name, smp_processor_id())
+
+#define init_per_cpu(name, v) \
+	do { \
+		int __i_p_c_i; \
+		for (__i_p_c_i = 0; __i_p_c_i < NR_CPUS; __i_p_c_i++) \
+			per_cpu(name, __i_p_c_i) = v; \
+	} while (0)
+
+/*
+ * CPU state checking (crowbarred).
+ */
+
+#define idle_cpu(cpu) 0
+#define in_softirq() 1
+#define hardirq_count() 0
+#define PREEMPT_SHIFT   0
+#define SOFTIRQ_SHIFT   (PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT   (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define PREEMPT_BITS    8
+#define SOFTIRQ_BITS    8
+
+/*
+ * CPU hotplug.
+ */
+
+struct notifier_block {
+	int (*notifier_call)(struct notifier_block *, unsigned long, void *);
+	struct notifier_block *next;
+	int priority;
+};
+
+#define CPU_ONLINE		0x0002 /* CPU (unsigned)v is up */
+#define CPU_UP_PREPARE		0x0003 /* CPU (unsigned)v coming up */
+#define CPU_UP_CANCELED		0x0004 /* CPU (unsigned)v NOT coming up */
+#define CPU_DOWN_PREPARE	0x0005 /* CPU (unsigned)v going down */
+#define CPU_DOWN_FAILED		0x0006 /* CPU (unsigned)v NOT going down */
+#define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
+#define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
+				        * not handling interrupts, soon dead */
+#define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
+					* lock is dropped */
+
+/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
+ * operation in progress
+ */
+#define CPU_TASKS_FROZEN	0x0010
+
+#define CPU_ONLINE_FROZEN	(CPU_ONLINE | CPU_TASKS_FROZEN)
+#define CPU_UP_PREPARE_FROZEN	(CPU_UP_PREPARE | CPU_TASKS_FROZEN)
+#define CPU_UP_CANCELED_FROZEN	(CPU_UP_CANCELED | CPU_TASKS_FROZEN)
+#define CPU_DOWN_PREPARE_FROZEN	(CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
+#define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
+#define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
+#define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
+
+/* Hibernation and suspend events */
+#define PM_HIBERNATION_PREPARE	0x0001 /* Going to hibernate */
+#define PM_POST_HIBERNATION	0x0002 /* Hibernation finished */
+#define PM_SUSPEND_PREPARE	0x0003 /* Going to suspend the system */
+#define PM_POST_SUSPEND		0x0004 /* Suspend finished */
+#define PM_RESTORE_PREPARE	0x0005 /* Going to restore a saved image */
+#define PM_POST_RESTORE		0x0006 /* Restore failed */
+
+#define NOTIFY_DONE		0x0000		/* Don't care */
+#define NOTIFY_OK		0x0001		/* Suits me */
+#define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
+#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)
+						/* Bad/Veto action */
+/*
+ * Clean way to return from the notifier and stop further calls.
+ */
+#define NOTIFY_STOP		(NOTIFY_OK|NOTIFY_STOP_MASK)
+
+/*
+ * Bug checks.
+ */
+
+#define BUG_ON(c) do { if (!(c)) abort(); } while (0)
+
+/*
+ * Initialization -- Must be called before calling any primitives.
+ */
+
+static void smp_init(void)
+{
+	int i;
+
+	spin_lock_init(&__thread_id_map_mutex);
+	__thread_id_map[0] = pthread_self();
+	for (i = 1; i < NR_THREADS; i++)
+		__thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
+	init_per_thread(smp_processor_id, 0);
+	if (pthread_key_create(&thread_id_key, NULL) != 0) {
+		perror("pthread_key_create");
+		exit(-1);
+	}
+}
+
+/* Taken from the Linux kernel source tree, so GPLv2-only!!! */
+
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+#define LIST_POISON1  ((void *) 0x00100100)
+#define LIST_POISON2  ((void *) 0x00200200)
+
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+#else
+extern void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next);
+#endif
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty() on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	entry->next = LIST_POISON1;
+	entry->prev = LIST_POISON2;
+}
+#else
+extern void list_del(struct list_head *entry);
+#endif
+
+/**
+ * list_replace - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ *
+ * If @old was empty, it will be overwritten.
+ */
+static inline void list_replace(struct list_head *old,
+				struct list_head *new)
+{
+	new->next = old->next;
+	new->next->prev = new;
+	new->prev = old->prev;
+	new->prev->next = new;
+}
+
+static inline void list_replace_init(struct list_head *old,
+					struct list_head *new)
+{
+	list_replace(old, new);
+	INIT_LIST_HEAD(old);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+	__list_del(list->prev, list->next);
+	list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+				  struct list_head *head)
+{
+	__list_del(list->prev, list->next);
+	list_add_tail(list, head);
+}
+
+/**
+ * list_is_last - tests whether @list is the last entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_last(const struct list_head *list,
+				const struct list_head *head)
+{
+	return list->next == head;
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+	return head->next == head;
+}
+
+/**
+ * list_empty_careful - tests whether a list is empty and not being modified
+ * @head: the list to test
+ *
+ * Description:
+ * tests whether a list is empty _and_ checks that no other CPU might be
+ * in the process of modifying either member (next or prev)
+ *
+ * NOTE: using list_empty_careful() without synchronization
+ * can only be safe if the only activity that can happen
+ * to the list entry is list_del_init(). Eg. it cannot be used
+ * if another CPU could re-list_add() it.
+ */
+static inline int list_empty_careful(const struct list_head *head)
+{
+	struct list_head *next = head->next;
+	return (next == head) && (next == head->prev);
+}
+
+/**
+ * list_is_singular - tests whether a list has just one entry.
+ * @head: the list to test.
+ */
+static inline int list_is_singular(const struct list_head *head)
+{
+	return !list_empty(head) && (head->next == head->prev);
+}
+
+static inline void __list_cut_position(struct list_head *list,
+		struct list_head *head, struct list_head *entry)
+{
+	struct list_head *new_first = entry->next;
+	list->next = head->next;
+	list->next->prev = list;
+	list->prev = entry;
+	entry->next = list;
+	head->next = new_first;
+	new_first->prev = head;
+}
+
+/**
+ * list_cut_position - cut a list into two
+ * @list: a new list to add all removed entries
+ * @head: a list with entries
+ * @entry: an entry within head, could be the head itself
+ *	and if so we won't cut the list
+ *
+ * This helper moves the initial part of @head, up to and
+ * including @entry, from @head to @list. You should
+ * pass on @entry an element you know is on @head. @list
+ * should be an empty list or a list you do not care about
+ * losing its data.
+ *
+ */
+static inline void list_cut_position(struct list_head *list,
+		struct list_head *head, struct list_head *entry)
+{
+	if (list_empty(head))
+		return;
+	if (list_is_singular(head) &&
+		(head->next != entry && head != entry))
+		return;
+	if (entry == head)
+		INIT_LIST_HEAD(list);
+	else
+		__list_cut_position(list, head, entry);
+}
+
+static inline void __list_splice(const struct list_head *list,
+				 struct list_head *prev,
+				 struct list_head *next)
+{
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+
+	first->prev = prev;
+	prev->next = first;
+
+	last->next = next;
+	next->prev = last;
+}
+
+/**
+ * list_splice - join two lists, this is designed for stacks
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(const struct list_head *list,
+				struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head, head->next);
+}
+
+/**
+ * list_splice_tail - join two lists, each list being a queue
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice_tail(struct list_head *list,
+				struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head->prev, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+				    struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head, head->next);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+/**
+ * list_splice_tail_init - join two lists and reinitialise the emptied list
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * Each of the lists is a queue.
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_tail_init(struct list_head *list,
+					 struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head->prev, head);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:	the &struct list_head pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+	container_of(ptr, type, member)
+
+/**
+ * list_first_entry - get the first element from a list
+ * @ptr:	the list head to take the element from.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_first_entry(ptr, type, member) \
+	list_entry((ptr)->next, type, member)
+
+/**
+ * list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each(pos, head) \
+	for (pos = (head)->next; prefetch(pos->next), pos != (head); \
+        	pos = pos->next)
+
+/**
+ * __list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ *
+ * This variant differs from list_for_each() in that it's the
+ * simplest possible list iteration code, no prefetching is done.
+ * Use this for code that knows the list to be very short (empty
+ * or 1 entry) most of the time.
+ */
+#define __list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev	-	iterate over a list backwards
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+	for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \
+        	pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+
+/**
+ * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_prev_safe(pos, n, head) \
+	for (pos = (head)->prev, n = pos->prev; \
+	     prefetch(pos->prev), pos != (head); \
+	     pos = n, n = pos->prev)
+
+/**
+ * list_for_each_entry	-	iterate over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member)				\
+	for (pos = list_entry((head)->next, typeof(*pos), member);	\
+	     prefetch(pos->member.next), &pos->member != (head); 	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member)			\
+	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
+	     prefetch(pos->member.prev), &pos->member != (head); 	\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
+ * @pos:	the type * to use as a start point
+ * @head:	the head of the list
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Prepares a pos entry for use as a start point in list_for_each_entry_continue().
+ */
+#define list_prepare_entry(pos, head, member) \
+	((pos) ? : list_entry(head, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue - continue iteration over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue(pos, head, member) 		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
+	     prefetch(pos->member.next), &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue_reverse - iterate backwards from the given point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Start to iterate over list of given type backwards, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue_reverse(pos, head, member)		\
+	for (pos = list_entry(pos->member.prev, typeof(*pos), member);	\
+	     prefetch(pos->member.prev), &pos->member != (head);	\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_from - iterate over list of given type from the current point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing from current position.
+ */
+#define list_for_each_entry_from(pos, head, member) 			\
+	for (; prefetch(pos->member.next), &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member)			\
+	for (pos = list_entry((head)->next, typeof(*pos), member),	\
+		n = list_entry(pos->member.next, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_continue
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing after current point,
+ * safe against removal of list entry.
+ */
+#define list_for_each_entry_safe_continue(pos, n, head, member) 		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member), 		\
+		n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_from
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_for_each_entry_safe_from(pos, n, head, member) 			\
+	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_reverse
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member)		\
+	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
+		n = list_entry(pos->member.prev, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
+/*
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is
+ * too wasteful.
+ * You lose the ability to access the tail in O(1).
+ */
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+	h->next = NULL;
+	h->pprev = NULL;
+}
+
+static inline int hlist_unhashed(const struct hlist_node *h)
+{
+	return !h->pprev;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+	return !h->first;
+}
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+	struct hlist_node *next = n->next;
+	struct hlist_node **pprev = n->pprev;
+	*pprev = next;
+	if (next)
+		next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->next = LIST_POISON1;
+	n->pprev = LIST_POISON2;
+}
+
+static inline void hlist_del_init(struct hlist_node *n)
+{
+	if (!hlist_unhashed(n)) {
+		__hlist_del(n);
+		INIT_HLIST_NODE(n);
+	}
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+	n->pprev = &h->first;
+}
+
+/* next must be != NULL */
+static inline void hlist_add_before(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	n->pprev = next->pprev;
+	n->next = next;
+	next->pprev = &n->next;
+	*(n->pprev) = n;
+}
+
+static inline void hlist_add_after(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	next->next = n->next;
+	n->next = next;
+	next->pprev = &n->next;
+
+	if(next->next)
+		next->next->pprev  = &next->next;
+}
+
+/*
+ * Move a list from one list head to another. Fixup the pprev
+ * reference of the first entry if it exists.
+ */
+static inline void hlist_move_list(struct hlist_head *old,
+				   struct hlist_head *new)
+{
+	new->first = old->first;
+	if (new->first)
+		new->first->pprev = &new->first;
+	old->first = NULL;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_for_each(pos, head) \
+	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
+	     pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
+	     pos = n)
+
+/**
+ * hlist_for_each_entry	- iterate over list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry(tpos, pos, head, member)			 \
+	for (pos = (head)->first;					 \
+	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue(tpos, pos, member)		 \
+	for (pos = (pos)->next;						 \
+	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_from - iterate over a hlist continuing from current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from(tpos, pos, member)			 \
+	for (; pos && ({ prefetch(pos->next); 1;}) &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @n:		another &struct hlist_node to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
+	for (pos = (head)->first;					 \
+	     pos && ({ n = pos->next; 1; }) && 				 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = n)
+
+#endif
diff --git a/tests/api_x86.h b/tests/api_x86.h
new file mode 100644
index 0000000..f48fce9
--- /dev/null
+++ b/tests/api_x86.h
@@ -0,0 +1,1387 @@
+/* MECHANICALLY GENERATED, DO NOT EDIT!!! */
+
+#define _INCLUDE_API_H
+
+/*
+ * common.h: Common Linux kernel-isms.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; but version 2 of the License only due
+ * to code included from the Linux kernel.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) 2006 Paul E. McKenney, IBM.
+ *
+ * Much code taken from the Linux kernel.  For such code, the option
+ * to redistribute under later versions of GPL might not be available.
+ */
+
+#ifndef __always_inline
+#define __always_inline inline
+#endif
+
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
+
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)   __VA_ARGS__
+#  define ASM_CONST(x)          x
+#else
+/* This version of stringify will deal with commas... */
+#  define __stringify_in_c(...) #__VA_ARGS__
+#  define stringify_in_c(...)   __stringify_in_c(__VA_ARGS__) " "
+#  define __ASM_CONST(x)        x##UL
+#  define ASM_CONST(x)          __ASM_CONST(x)
+#endif
+
+
+/*
+ * arch-i386.h: Expose x86 atomic instructions.  80486 and better only.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, but version 2 only due to inclusion
+ * of Linux-kernel code.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) 2006 Paul E. McKenney, IBM.
+ *
+ * Much code taken from the Linux kernel.  For such code, the option
+ * to redistribute under later versions of GPL might not be available.
+ */
+
+/*
+ * Machine parameters.
+ */
+
+#define CACHE_LINE_SIZE 64
+#define ____cacheline_internodealigned_in_smp \
+	__attribute__((__aligned__(1 << 6)))
+
+#define LOCK_PREFIX "lock ; "
+
+/*
+ * Atomic data structure, initialization, and access.
+ */
+
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i)  { (i) }
+
+#define atomic_read(v)		((v)->counter)
+#define atomic_set(v, i)	(((v)->counter) = (i))
+
+/*
+ * Atomic operations.
+ */
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically adds @i to @v.
+ */
+static __inline__ void atomic_add(int i, atomic_t *v)
+{
+	__asm__ __volatile__(
+		LOCK_PREFIX "addl %1,%0"
+		:"+m" (v->counter)
+		:"ir" (i));
+}
+
+/**
+ * atomic_sub - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v.
+ */
+static __inline__ void atomic_sub(int i, atomic_t *v)
+{
+	__asm__ __volatile__(
+		LOCK_PREFIX "subl %1,%0"
+		:"+m" (v->counter)
+		:"ir" (i));
+}
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		LOCK_PREFIX "subl %2,%0; sete %1"
+		:"+m" (v->counter), "=qm" (c)
+		:"ir" (i) : "memory");
+	return c;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+	__asm__ __volatile__(
+		LOCK_PREFIX "incl %0"
+		:"+m" (v->counter));
+}
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1.
+ */ 
+static __inline__ void atomic_dec(atomic_t *v)
+{
+	__asm__ __volatile__(
+		LOCK_PREFIX "decl %0"
+		:"+m" (v->counter));
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */ 
+static __inline__ int atomic_dec_and_test(atomic_t *v)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		LOCK_PREFIX "decl %0; sete %1"
+		:"+m" (v->counter), "=qm" (c)
+		: : "memory");
+	return c != 0;
+}
+
+/**
+ * atomic_inc_and_test - increment and test 
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */ 
+static __inline__ int atomic_inc_and_test(atomic_t *v)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		LOCK_PREFIX "incl %0; sete %1"
+		:"+m" (v->counter), "=qm" (c)
+		: : "memory");
+	return c != 0;
+}
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ * 
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */ 
+static __inline__ int atomic_add_negative(int i, atomic_t *v)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		LOCK_PREFIX "addl %2,%0; sets %1"
+		:"+m" (v->counter), "=qm" (c)
+		:"ir" (i) : "memory");
+	return c;
+}
+
+/**
+ * atomic_add_return - add and return
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static __inline__ int atomic_add_return(int i, atomic_t *v)
+{
+	int __i;
+
+	__i = i;
+	__asm__ __volatile__(
+		LOCK_PREFIX "xaddl %0, %1;"
+		:"=r"(i)
+		:"m"(v->counter), "0"(i));
+	return i + __i;
+}
+
+static __inline__ int atomic_sub_return(int i, atomic_t *v)
+{
+	return atomic_add_return(-i,v);
+}
+
+static inline unsigned int
+cmpxchg(volatile long *ptr, long oldval, long newval)
+{
+	unsigned long retval;
+
+	asm("# cmpxchg\n"
+	    "lock; cmpxchgl %4,(%2)\n"
+	    "# end atomic_cmpxchg4"
+	    : "=a" (retval), "=m" (*ptr)
+	    : "r" (ptr), "0" (oldval), "r" (newval), "m" (*ptr)
+	    : "cc");
+	return (retval);
+}
+
+#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	for (;;) {						\
+		if (unlikely(c == (u)))				\
+			break;					\
+		old = atomic_cmpxchg((v), c, c + (a));		\
+		if (likely(old == c))				\
+			break;					\
+		c = old;					\
+	}							\
+	c != (u);						\
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+#define atomic_inc_return(v)  (atomic_add_return(1,v))
+#define atomic_dec_return(v)  (atomic_sub_return(1,v))
+
+/* These are x86-specific, used by some header files */
+#define atomic_clear_mask(mask, addr) \
+__asm__ __volatile__(LOCK_PREFIX "andl %0,%1" \
+: : "r" (~(mask)),"m" (*addr) : "memory")
+
+#define atomic_set_mask(mask, addr) \
+__asm__ __volatile__(LOCK_PREFIX "orl %0,%1" \
+: : "r" (mask),"m" (*(addr)) : "memory")
+
+/* Atomic operations are already serializing on x86 */
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+/*
+ * api_pthreads.h: API mapping to pthreads environment.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.  However, please note that much
+ * of the code in this file derives from the Linux kernel, and that such
+ * code may not be available except under GPLv2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) 2006 Paul E. McKenney, IBM.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/types.h>
+#define __USE_GNU
+#include <pthread.h>
+#include <sched.h>
+#include <sys/param.h>
+/* #include "atomic.h" */
+
+/*
+ * Compiler magic.
+ */
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Default machine parameters.
+ */
+
+#ifndef CACHE_LINE_SIZE
+#define CACHE_LINE_SIZE 128
+#endif /* #ifndef CACHE_LINE_SIZE */
+
+/*
+ * Exclusive locking primitives.
+ */
+
+typedef pthread_mutex_t spinlock_t;
+
+#define DEFINE_SPINLOCK(lock) spinlock_t lock = PTHREAD_MUTEX_INITIALIZER;
+#define __SPIN_LOCK_UNLOCKED(lockp) PTHREAD_MUTEX_INITIALIZER
+
+static void spin_lock_init(spinlock_t *sp)
+{
+	if (pthread_mutex_init(sp, NULL) != 0) {
+		perror("spin_lock_init:pthread_mutex_init");
+		exit(-1);
+	}
+}
+
+static void spin_lock(spinlock_t *sp)
+{
+	if (pthread_mutex_lock(sp) != 0) {
+		perror("spin_lock:pthread_mutex_lock");
+		exit(-1);
+	}
+}
+
+static void spin_unlock(spinlock_t *sp)
+{
+	if (pthread_mutex_unlock(sp) != 0) {
+		perror("spin_unlock:pthread_mutex_unlock");
+		exit(-1);
+	}
+}
+
+#define spin_lock_irqsave(l, f) do { f = 1; spin_lock(l); } while (0)
+#define spin_unlock_irqrestore(l, f) do { f = 0; spin_unlock(l); } while (0)
+
+/*
+ * Thread creation/destruction primitives.
+ */
+
+typedef pthread_t thread_id_t;
+
+#define NR_THREADS 128
+
+#define __THREAD_ID_MAP_EMPTY 0
+#define __THREAD_ID_MAP_WAITING 1
+thread_id_t __thread_id_map[NR_THREADS];
+spinlock_t __thread_id_map_mutex;
+
+#define for_each_thread(t) \
+	for (t = 0; t < NR_THREADS; t++)
+
+#define for_each_running_thread(t) \
+	for (t = 0; t < NR_THREADS; t++) \
+		if ((__thread_id_map[t] != __THREAD_ID_MAP_EMPTY) && \
+		    (__thread_id_map[t] != __THREAD_ID_MAP_WAITING))
+
+pthread_key_t thread_id_key;
+
+static int __smp_thread_id(void)
+{
+	int i;
+	thread_id_t tid = pthread_self();
+
+	for (i = 0; i < NR_THREADS; i++) {
+		if (__thread_id_map[i] == tid) {
+			long v = i + 1;  /* must be non-NULL. */
+
+			if (pthread_setspecific(thread_id_key, (void *)v) != 0) {
+				perror("pthread_setspecific");
+				exit(-1);
+			}
+			return i;
+		}
+	}
+	spin_lock(&__thread_id_map_mutex);
+	for (i = 0; i < NR_THREADS; i++) {
+		if (__thread_id_map[i] == tid)
+			spin_unlock(&__thread_id_map_mutex);
+			return i;
+	}
+	spin_unlock(&__thread_id_map_mutex);
+	fprintf(stderr, "smp_thread_id: Rogue thread, id: %d(%#x)\n",
+			(int)tid, (int)tid);
+	exit(-1);
+}
+
+static int smp_thread_id(void)
+{
+	void *id;
+
+	id = pthread_getspecific(thread_id_key);
+	if (id == NULL)
+		return __smp_thread_id();
+	return (long)(id - 1);
+}
+
+static thread_id_t create_thread(void *(*func)(void *), void *arg)
+{
+	thread_id_t tid;
+	int i;
+
+	spin_lock(&__thread_id_map_mutex);
+	for (i = 0; i < NR_THREADS; i++) {
+		if (__thread_id_map[i] == __THREAD_ID_MAP_EMPTY)
+			break;
+	}
+	if (i >= NR_THREADS) {
+		spin_unlock(&__thread_id_map_mutex);
+		fprintf(stderr, "Thread limit of %d exceeded!\n", NR_THREADS);
+		exit(-1);
+	}
+	__thread_id_map[i] = __THREAD_ID_MAP_WAITING;
+	spin_unlock(&__thread_id_map_mutex);
+	if (pthread_create(&tid, NULL, func, arg) != 0) {
+		perror("create_thread:pthread_create");
+		exit(-1);
+	}
+	__thread_id_map[i] = tid;
+	return tid;
+}
+
+static void *wait_thread(thread_id_t tid)
+{
+	int i;
+	void *vp;
+
+	for (i = 0; i < NR_THREADS; i++) {
+		if (__thread_id_map[i] == tid)
+			break;
+	}
+	if (i >= NR_THREADS){
+		fprintf(stderr, "wait_thread: bad tid = %d(%#x)\n",
+				(int)tid, (int)tid);
+		exit(-1);
+	}
+	if (pthread_join(tid, &vp) != 0) {
+		perror("wait_thread:pthread_join");
+		exit(-1);
+	}
+	__thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
+	return vp;
+}
+
+static void wait_all_threads(void)
+{
+	int i;
+	thread_id_t tid;
+
+	for (i = 1; i < NR_THREADS; i++) {
+		tid = __thread_id_map[i];
+		if (tid != __THREAD_ID_MAP_EMPTY &&
+		    tid != __THREAD_ID_MAP_WAITING)
+			(void)wait_thread(tid);
+	}
+}
+
+static void run_on(int cpu)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+/*
+ * timekeeping -- very crude -- should use MONOTONIC...
+ */
+
+long long get_microseconds(void)
+{
+	struct timeval tv;
+
+	if (gettimeofday(&tv, NULL) != 0)
+		abort();
+	return ((long long)tv.tv_sec) * 1000000LL + (long long)tv.tv_usec;
+}
+
+/*
+ * Per-thread variables.
+ */
+
+#define DEFINE_PER_THREAD(type, name) \
+	struct { \
+		__typeof__(type) v \
+			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
+	} __per_thread_##name[NR_THREADS];
+#define DECLARE_PER_THREAD(type, name) extern DEFINE_PER_THREAD(type, name)
+
+#define per_thread(name, thread) __per_thread_##name[thread].v
+#define __get_thread_var(name) per_thread(name, smp_thread_id())
+
+#define init_per_thread(name, v) \
+	do { \
+		int __i_p_t_i; \
+		for (__i_p_t_i = 0; __i_p_t_i < NR_THREADS; __i_p_t_i++) \
+			per_thread(name, __i_p_t_i) = v; \
+	} while (0)
+
+/*
+ * CPU traversal primitives.
+ */
+
+#ifndef NR_CPUS
+#define NR_CPUS 16
+#endif /* #ifndef NR_CPUS */
+
+#define for_each_possible_cpu(cpu) \
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+#define for_each_online_cpu(cpu) \
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+
+/*
+ * Per-CPU variables.
+ */
+
+#define DEFINE_PER_CPU(type, name) \
+	struct { \
+		__typeof__(type) v \
+			__attribute__((__aligned__(CACHE_LINE_SIZE))); \
+	} __per_cpu_##name[NR_CPUS]
+#define DECLARE_PER_CPU(type, name) extern DEFINE_PER_CPU(type, name)
+
+DEFINE_PER_THREAD(int, smp_processor_id);
+
+#define per_cpu(name, thread) __per_cpu_##name[thread].v
+#define __get_cpu_var(name) per_cpu(name, smp_processor_id())
+
+#define init_per_cpu(name, v) \
+	do { \
+		int __i_p_c_i; \
+		for (__i_p_c_i = 0; __i_p_c_i < NR_CPUS; __i_p_c_i++) \
+			per_cpu(name, __i_p_c_i) = v; \
+	} while (0)
+
+/*
+ * CPU state checking (crowbarred).
+ */
+
+#define idle_cpu(cpu) 0
+#define in_softirq() 1
+#define hardirq_count() 0
+#define PREEMPT_SHIFT   0
+#define SOFTIRQ_SHIFT   (PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT   (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define PREEMPT_BITS    8
+#define SOFTIRQ_BITS    8
+
+/*
+ * CPU hotplug.
+ */
+
+struct notifier_block {
+	int (*notifier_call)(struct notifier_block *, unsigned long, void *);
+	struct notifier_block *next;
+	int priority;
+};
+
+#define CPU_ONLINE		0x0002 /* CPU (unsigned)v is up */
+#define CPU_UP_PREPARE		0x0003 /* CPU (unsigned)v coming up */
+#define CPU_UP_CANCELED		0x0004 /* CPU (unsigned)v NOT coming up */
+#define CPU_DOWN_PREPARE	0x0005 /* CPU (unsigned)v going down */
+#define CPU_DOWN_FAILED		0x0006 /* CPU (unsigned)v NOT going down */
+#define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
+#define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
+				        * not handling interrupts, soon dead */
+#define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
+					* lock is dropped */
+
+/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
+ * operation in progress
+ */
+#define CPU_TASKS_FROZEN	0x0010
+
+#define CPU_ONLINE_FROZEN	(CPU_ONLINE | CPU_TASKS_FROZEN)
+#define CPU_UP_PREPARE_FROZEN	(CPU_UP_PREPARE | CPU_TASKS_FROZEN)
+#define CPU_UP_CANCELED_FROZEN	(CPU_UP_CANCELED | CPU_TASKS_FROZEN)
+#define CPU_DOWN_PREPARE_FROZEN	(CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
+#define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
+#define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
+#define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
+
+/* Hibernation and suspend events */
+#define PM_HIBERNATION_PREPARE	0x0001 /* Going to hibernate */
+#define PM_POST_HIBERNATION	0x0002 /* Hibernation finished */
+#define PM_SUSPEND_PREPARE	0x0003 /* Going to suspend the system */
+#define PM_POST_SUSPEND		0x0004 /* Suspend finished */
+#define PM_RESTORE_PREPARE	0x0005 /* Going to restore a saved image */
+#define PM_POST_RESTORE		0x0006 /* Restore failed */
+
+#define NOTIFY_DONE		0x0000		/* Don't care */
+#define NOTIFY_OK		0x0001		/* Suits me */
+#define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
+#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)
+						/* Bad/Veto action */
+/*
+ * Clean way to return from the notifier and stop further calls.
+ */
+#define NOTIFY_STOP		(NOTIFY_OK|NOTIFY_STOP_MASK)
+
+/*
+ * Bug checks.
+ */
+
+#define BUG_ON(c) do { if (!(c)) abort(); } while (0)
+
+/*
+ * Initialization -- Must be called before calling any primitives.
+ */
+
+static void smp_init(void)
+{
+	int i;
+
+	spin_lock_init(&__thread_id_map_mutex);
+	__thread_id_map[0] = pthread_self();
+	for (i = 1; i < NR_THREADS; i++)
+		__thread_id_map[i] = __THREAD_ID_MAP_EMPTY;
+	init_per_thread(smp_processor_id, 0);
+	if (pthread_key_create(&thread_id_key, NULL) != 0) {
+		perror("pthread_key_create");
+		exit(-1);
+	}
+}
+
+/* Taken from the Linux kernel source tree, so GPLv2-only!!! */
+
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+#define LIST_POISON1  ((void *) 0x00100100)
+#define LIST_POISON2  ((void *) 0x00200200)
+
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+#else
+extern void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next);
+#endif
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty() on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	entry->next = LIST_POISON1;
+	entry->prev = LIST_POISON2;
+}
+#else
+extern void list_del(struct list_head *entry);
+#endif
+
+/**
+ * list_replace - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ *
+ * If @old was empty, it will be overwritten.
+ */
+static inline void list_replace(struct list_head *old,
+				struct list_head *new)
+{
+	new->next = old->next;
+	new->next->prev = new;
+	new->prev = old->prev;
+	new->prev->next = new;
+}
+
+static inline void list_replace_init(struct list_head *old,
+					struct list_head *new)
+{
+	list_replace(old, new);
+	INIT_LIST_HEAD(old);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+	__list_del(list->prev, list->next);
+	list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+				  struct list_head *head)
+{
+	__list_del(list->prev, list->next);
+	list_add_tail(list, head);
+}
+
+/**
+ * list_is_last - tests whether @list is the last entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_last(const struct list_head *list,
+				const struct list_head *head)
+{
+	return list->next == head;
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+	return head->next == head;
+}
+
+/**
+ * list_empty_careful - tests whether a list is empty and not being modified
+ * @head: the list to test
+ *
+ * Description:
+ * tests whether a list is empty _and_ checks that no other CPU might be
+ * in the process of modifying either member (next or prev)
+ *
+ * NOTE: using list_empty_careful() without synchronization
+ * can only be safe if the only activity that can happen
+ * to the list entry is list_del_init(). Eg. it cannot be used
+ * if another CPU could re-list_add() it.
+ */
+static inline int list_empty_careful(const struct list_head *head)
+{
+	struct list_head *next = head->next;
+	return (next == head) && (next == head->prev);
+}
+
+/**
+ * list_is_singular - tests whether a list has just one entry.
+ * @head: the list to test.
+ */
+static inline int list_is_singular(const struct list_head *head)
+{
+	return !list_empty(head) && (head->next == head->prev);
+}
+
+static inline void __list_cut_position(struct list_head *list,
+		struct list_head *head, struct list_head *entry)
+{
+	struct list_head *new_first = entry->next;
+	list->next = head->next;
+	list->next->prev = list;
+	list->prev = entry;
+	entry->next = list;
+	head->next = new_first;
+	new_first->prev = head;
+}
+
+/**
+ * list_cut_position - cut a list into two
+ * @list: a new list to add all removed entries
+ * @head: a list with entries
+ * @entry: an entry within head, could be the head itself
+ *	and if so we won't cut the list
+ *
+ * This helper moves the initial part of @head, up to and
+ * including @entry, from @head to @list. You should
+ * pass on @entry an element you know is on @head. @list
+ * should be an empty list or a list you do not care about
+ * losing its data.
+ *
+ */
+static inline void list_cut_position(struct list_head *list,
+		struct list_head *head, struct list_head *entry)
+{
+	if (list_empty(head))
+		return;
+	if (list_is_singular(head) &&
+		(head->next != entry && head != entry))
+		return;
+	if (entry == head)
+		INIT_LIST_HEAD(list);
+	else
+		__list_cut_position(list, head, entry);
+}
+
+static inline void __list_splice(const struct list_head *list,
+				 struct list_head *prev,
+				 struct list_head *next)
+{
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+
+	first->prev = prev;
+	prev->next = first;
+
+	last->next = next;
+	next->prev = last;
+}
+
+/**
+ * list_splice - join two lists, this is designed for stacks
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(const struct list_head *list,
+				struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head, head->next);
+}
+
+/**
+ * list_splice_tail - join two lists, each list being a queue
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice_tail(struct list_head *list,
+				struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head->prev, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+				    struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head, head->next);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+/**
+ * list_splice_tail_init - join two lists and reinitialise the emptied list
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * Each of the lists is a queue.
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_tail_init(struct list_head *list,
+					 struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head->prev, head);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:	the &struct list_head pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+	container_of(ptr, type, member)
+
+/**
+ * list_first_entry - get the first element from a list
+ * @ptr:	the list head to take the element from.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_first_entry(ptr, type, member) \
+	list_entry((ptr)->next, type, member)
+
+/**
+ * list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each(pos, head) \
+	for (pos = (head)->next; prefetch(pos->next), pos != (head); \
+        	pos = pos->next)
+
+/**
+ * __list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ *
+ * This variant differs from list_for_each() in that it's the
+ * simplest possible list iteration code, no prefetching is done.
+ * Use this for code that knows the list to be very short (empty
+ * or 1 entry) most of the time.
+ */
+#define __list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev	-	iterate over a list backwards
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+	for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \
+        	pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+
+/**
+ * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_prev_safe(pos, n, head) \
+	for (pos = (head)->prev, n = pos->prev; \
+	     prefetch(pos->prev), pos != (head); \
+	     pos = n, n = pos->prev)
+
+/**
+ * list_for_each_entry	-	iterate over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member)				\
+	for (pos = list_entry((head)->next, typeof(*pos), member);	\
+	     prefetch(pos->member.next), &pos->member != (head); 	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member)			\
+	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
+	     prefetch(pos->member.prev), &pos->member != (head); 	\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
+ * @pos:	the type * to use as a start point
+ * @head:	the head of the list
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Prepares a pos entry for use as a start point in list_for_each_entry_continue().
+ */
+#define list_prepare_entry(pos, head, member) \
+	((pos) ? : list_entry(head, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue - continue iteration over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue(pos, head, member) 		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
+	     prefetch(pos->member.next), &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue_reverse - iterate backwards from the given point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Start to iterate over list of given type backwards, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue_reverse(pos, head, member)		\
+	for (pos = list_entry(pos->member.prev, typeof(*pos), member);	\
+	     prefetch(pos->member.prev), &pos->member != (head);	\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_from - iterate over list of given type from the current point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing from current position.
+ */
+#define list_for_each_entry_from(pos, head, member) 			\
+	for (; prefetch(pos->member.next), &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member)			\
+	for (pos = list_entry((head)->next, typeof(*pos), member),	\
+		n = list_entry(pos->member.next, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_continue
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing after current point,
+ * safe against removal of list entry.
+ */
+#define list_for_each_entry_safe_continue(pos, n, head, member) 		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member), 		\
+		n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_from
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_for_each_entry_safe_from(pos, n, head, member) 			\
+	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_reverse
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member)		\
+	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
+		n = list_entry(pos->member.prev, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
+/*
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is
+ * too wasteful.
+ * You lose the ability to access the tail in O(1).
+ */
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+	h->next = NULL;
+	h->pprev = NULL;
+}
+
+static inline int hlist_unhashed(const struct hlist_node *h)
+{
+	return !h->pprev;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+	return !h->first;
+}
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+	struct hlist_node *next = n->next;
+	struct hlist_node **pprev = n->pprev;
+	*pprev = next;
+	if (next)
+		next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->next = LIST_POISON1;
+	n->pprev = LIST_POISON2;
+}
+
+static inline void hlist_del_init(struct hlist_node *n)
+{
+	if (!hlist_unhashed(n)) {
+		__hlist_del(n);
+		INIT_HLIST_NODE(n);
+	}
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+	n->pprev = &h->first;
+}
+
+/* next must be != NULL */
+static inline void hlist_add_before(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	n->pprev = next->pprev;
+	n->next = next;
+	next->pprev = &n->next;
+	*(n->pprev) = n;
+}
+
+static inline void hlist_add_after(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	next->next = n->next;
+	n->next = next;
+	next->pprev = &n->next;
+
+	if(next->next)
+		next->next->pprev  = &next->next;
+}
+
+/*
+ * Move a list from one list head to another. Fixup the pprev
+ * reference of the first entry if it exists.
+ */
+static inline void hlist_move_list(struct hlist_head *old,
+				   struct hlist_head *new)
+{
+	new->first = old->first;
+	if (new->first)
+		new->first->pprev = &new->first;
+	old->first = NULL;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_for_each(pos, head) \
+	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
+	     pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
+	     pos = n)
+
+/**
+ * hlist_for_each_entry	- iterate over list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry(tpos, pos, head, member)			 \
+	for (pos = (head)->first;					 \
+	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue(tpos, pos, member)		 \
+	for (pos = (pos)->next;						 \
+	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_from - iterate over a hlist continuing from current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from(tpos, pos, member)			 \
+	for (; pos && ({ prefetch(pos->next); 1;}) &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @n:		another &struct hlist_node to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
+	for (pos = (head)->first;					 \
+	     pos && ({ n = pos->next; 1; }) && 				 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = n)
+
+#endif
diff --git a/tests/rcutorture.h b/tests/rcutorture.h
new file mode 100644
index 0000000..00b0b31
--- /dev/null
+++ b/tests/rcutorture.h
@@ -0,0 +1,417 @@
+/*
+ * rcutorture.h: simple user-level performance/stress test of RCU.
+ *
+ * Usage:
+ * 	./rcu <nreaders> rperf [ <cpustride> ]
+ * 		Run a read-side performance test with the specified
+ * 		number of readers spaced by <cpustride>.
+ * 		Thus "./rcu 16 rperf 2" would run 16 readers on even-numbered
+ * 		CPUs from 0 to 30.
+ * 	./rcu <nupdaters> uperf [ <cpustride> ]
+ * 		Run an update-side performance test with the specified
+ * 		number of updaters and specified CPU spacing.
+ * 	./rcu <nreaders> perf [ <cpustride> ]
+ * 		Run a combined read/update performance test with the specified
+ * 		number of readers and one updater and specified CPU spacing.
+ * 		The readers run on the low-numbered CPUs and the updater
+ * 		of the highest-numbered CPU.
+ *
+ * The above tests produce output as follows:
+ *
+ * n_reads: 46008000  n_updates: 146026  nreaders: 2  nupdaters: 1 duration: 1
+ * ns/read: 43.4707  ns/update: 6848.1
+ *
+ * The first line lists the total number of RCU reads and updates executed
+ * during the test, the number of reader threads, the number of updater
+ * threads, and the duration of the test in seconds.  The second line
+ * lists the average duration of each type of operation in nanoseconds,
+ * or "nan" if the corresponding type of operation was not performed.
+ *
+ * 	./rcu <nreaders> stress
+ * 		Run a stress test with the specified number of readers and
+ * 		one updater.  None of the threads are affinitied to any
+ * 		particular CPU.
+ *
+ * This test produces output as follows:
+ *
+ * n_reads: 114633217  n_updates: 3903415  n_mberror: 0
+ * rcu_stress_count: 114618391 14826 0 0 0 0 0 0 0 0 0
+ *
+ * The first line lists the number of RCU read and update operations
+ * executed, followed by the number of memory-ordering violations
+ * (which will be zero in a correct RCU implementation).  The second
+ * line lists the number of readers observing progressively more stale
+ * data.  A correct RCU implementation will have all but the first two
+ * numbers non-zero.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) 2008 Paul E. McKenney, IBM Corporation.
+ */
+
+/*
+ * Test variables.
+ */
+
+DEFINE_PER_THREAD(long long, n_reads_pt);
+DEFINE_PER_THREAD(long long, n_updates_pt);
+
+long long n_reads = 0LL;
+long n_updates = 0L;
+atomic_t nthreadsrunning;
+char argsbuf[64];
+
+#define GOFLAG_INIT 0
+#define GOFLAG_RUN  1
+#define GOFLAG_STOP 2
+
+int goflag __attribute__((__aligned__(CACHE_LINE_SIZE))) = GOFLAG_INIT;
+
+#define RCU_READ_RUN 1000
+
+//MD
+#define RCU_READ_NESTABLE
+
+#ifdef RCU_READ_NESTABLE
+#define rcu_read_lock_nest() rcu_read_lock()
+#define rcu_read_unlock_nest() rcu_read_unlock()
+#else /* #ifdef RCU_READ_NESTABLE */
+#define rcu_read_lock_nest()
+#define rcu_read_unlock_nest()
+#endif /* #else #ifdef RCU_READ_NESTABLE */
+
+#ifndef mark_rcu_quiescent_state
+#define mark_rcu_quiescent_state() do ; while (0)
+#endif /* #ifdef mark_rcu_quiescent_state */
+
+#ifndef put_thread_offline
+#define put_thread_offline()		do ; while (0)
+#define put_thread_online()		do ; while (0)
+#define put_thread_online_delay()	do ; while (0)
+#else /* #ifndef put_thread_offline */
+#define put_thread_online_delay()	synchronize_rcu()
+#endif /* #else #ifndef put_thread_offline */
+
+/*
+ * Performance test.
+ */
+
+void *rcu_read_perf_test(void *arg)
+{
+	int i;
+	int me = (long)arg;
+	long long n_reads_local = 0;
+
+	rcu_register_thread();
+	run_on(me);
+	atomic_inc(&nthreadsrunning);
+	while (goflag == GOFLAG_INIT)
+		poll(NULL, 0, 1);
+	mark_rcu_quiescent_state();
+	while (goflag == GOFLAG_RUN) {
+		for (i = 0; i < RCU_READ_RUN; i++) {
+			rcu_read_lock();
+			/* rcu_read_lock_nest(); */
+			/* rcu_read_unlock_nest(); */
+			rcu_read_unlock();
+		}
+		n_reads_local += RCU_READ_RUN;
+		mark_rcu_quiescent_state();
+	}
+	__get_thread_var(n_reads_pt) += n_reads_local;
+	put_thread_offline();
+	rcu_unregister_thread();
+
+	return (NULL);
+}
+
+void *rcu_update_perf_test(void *arg)
+{
+	long long n_updates_local = 0;
+
+	atomic_inc(&nthreadsrunning);
+	while (goflag == GOFLAG_INIT)
+		poll(NULL, 0, 1);
+	while (goflag == GOFLAG_RUN) {
+		synchronize_rcu();
+		n_updates_local++;
+	}
+	__get_thread_var(n_updates_pt) += n_updates_local;
+	return NULL;
+}
+
+void perftestinit(void)
+{
+	init_per_thread(n_reads_pt, 0LL);
+	init_per_thread(n_updates_pt, 0LL);
+	atomic_set(&nthreadsrunning, 0);
+}
+
+void perftestrun(int nthreads, int nreaders, int nupdaters)
+{
+	int t;
+	int duration = 1;
+
+	smp_mb();
+	while (atomic_read(&nthreadsrunning) < nthreads)
+		poll(NULL, 0, 1);
+	goflag = GOFLAG_RUN;
+	smp_mb();
+	sleep(duration);
+	smp_mb();
+	goflag = GOFLAG_STOP;
+	smp_mb();
+	wait_all_threads();
+	for_each_thread(t) {
+		n_reads += per_thread(n_reads_pt, t);
+		n_updates += per_thread(n_updates_pt, t);
+	}
+	printf("n_reads: %lld  n_updates: %ld  nreaders: %d  nupdaters: %d duration: %d\n",
+	       n_reads, n_updates, nreaders, nupdaters, duration);
+	printf("ns/read: %g  ns/update: %g\n",
+	       ((duration * 1000*1000*1000.*(double)nreaders) /
+	        (double)n_reads),
+	       ((duration * 1000*1000*1000.*(double)nupdaters) /
+	        (double)n_updates));
+	exit(0);
+}
+
+void perftest(int nreaders, int cpustride)
+{
+	int i;
+	long arg;
+
+	perftestinit();
+	for (i = 0; i < nreaders; i++) {
+		arg = (long)(i * cpustride);
+		create_thread(rcu_read_perf_test, (void *)arg);
+	}
+	arg = (long)(i * cpustride);
+	create_thread(rcu_update_perf_test, (void *)arg);
+	perftestrun(i + 1, nreaders, 1);
+}
+
+void rperftest(int nreaders, int cpustride)
+{
+	int i;
+	long arg;
+
+	perftestinit();
+	init_per_thread(n_reads_pt, 0LL);
+	for (i = 0; i < nreaders; i++) {
+		arg = (long)(i * cpustride);
+		create_thread(rcu_read_perf_test, (void *)arg);
+	}
+	perftestrun(i, nreaders, 0);
+}
+
+void uperftest(int nupdaters, int cpustride)
+{
+	int i;
+	long arg;
+
+	perftestinit();
+	init_per_thread(n_reads_pt, 0LL);
+	for (i = 0; i < nupdaters; i++) {
+		arg = (long)(i * cpustride);
+		create_thread(rcu_update_perf_test, (void *)arg);
+	}
+	perftestrun(i, 0, nupdaters);
+}
+
+/*
+ * Stress test.
+ */
+
+#define RCU_STRESS_PIPE_LEN 10
+
+struct rcu_stress {
+	int pipe_count;
+	int mbtest;
+};
+
+struct rcu_stress rcu_stress_array[RCU_STRESS_PIPE_LEN] = { { 0 } };
+struct rcu_stress *rcu_stress_current;
+int rcu_stress_idx = 0;
+
+int n_mberror = 0;
+DEFINE_PER_THREAD(long long [RCU_STRESS_PIPE_LEN + 1], rcu_stress_count);
+
+int garbage = 0;
+
+void *rcu_read_stress_test(void *arg)
+{
+	int i;
+	int itercnt = 0;
+	struct rcu_stress *p;
+	int pc;
+
+	rcu_register_thread();
+	while (goflag == GOFLAG_INIT)
+		poll(NULL, 0, 1);
+	mark_rcu_quiescent_state();
+	while (goflag == GOFLAG_RUN) {
+		rcu_read_lock();
+		p = rcu_dereference(rcu_stress_current);
+		if (p->mbtest == 0)
+			n_mberror++;
+		rcu_read_lock_nest();
+		for (i = 0; i < 100; i++)
+			garbage++;
+		rcu_read_unlock_nest();
+		pc = p->pipe_count;
+		rcu_read_unlock();
+		if ((pc > RCU_STRESS_PIPE_LEN) || (pc < 0))
+			pc = RCU_STRESS_PIPE_LEN;
+		__get_thread_var(rcu_stress_count)[pc]++;
+		__get_thread_var(n_reads_pt)++;
+		mark_rcu_quiescent_state();
+		if ((++itercnt % 0x1000) == 0) {
+			put_thread_offline();
+			put_thread_online_delay();
+			put_thread_online();
+		}
+	}
+	put_thread_offline();
+	rcu_unregister_thread();
+
+	return (NULL);
+}
+
+void *rcu_update_stress_test(void *arg)
+{
+	int i;
+	struct rcu_stress *p;
+
+	while (goflag == GOFLAG_INIT)
+		poll(NULL, 0, 1);
+	while (goflag == GOFLAG_RUN) {
+		i = rcu_stress_idx + 1;
+		if (i >= RCU_STRESS_PIPE_LEN)
+			i = 0;
+		p = &rcu_stress_array[i];
+		p->mbtest = 0;
+		smp_mb();
+		p->pipe_count = 0;
+		p->mbtest = 1;
+		rcu_assign_pointer(rcu_stress_current, p);
+		rcu_stress_idx = i;
+		for (i = 0; i < RCU_STRESS_PIPE_LEN; i++)
+			if (i != rcu_stress_idx)
+				rcu_stress_array[i].pipe_count++;
+		synchronize_rcu();
+		n_updates++;
+	}
+	return NULL;
+}
+
+void *rcu_fake_update_stress_test(void *arg)
+{
+	while (goflag == GOFLAG_INIT)
+		poll(NULL, 0, 1);
+	while (goflag == GOFLAG_RUN) {
+		synchronize_rcu();
+		poll(NULL, 0, 1);
+	}
+	return NULL;
+}
+
+void stresstest(int nreaders)
+{
+	int i;
+	int t;
+	long long *p;
+	long long sum;
+
+	init_per_thread(n_reads_pt, 0LL);
+	for_each_thread(t) {
+		p = &per_thread(rcu_stress_count,t)[0];
+		for (i = 0; i <= RCU_STRESS_PIPE_LEN; i++)
+			p[i] = 0LL;
+	}
+	rcu_stress_current = &rcu_stress_array[0];
+	rcu_stress_current->pipe_count = 0;
+	rcu_stress_current->mbtest = 1;
+	for (i = 0; i < nreaders; i++)
+		create_thread(rcu_read_stress_test, NULL);
+	create_thread(rcu_update_stress_test, NULL);
+	for (i = 0; i < 5; i++)
+		create_thread(rcu_fake_update_stress_test, NULL);
+	smp_mb();
+	goflag = GOFLAG_RUN;
+	smp_mb();
+	sleep(10);
+	smp_mb();
+	goflag = GOFLAG_STOP;
+	smp_mb();
+	wait_all_threads();
+	for_each_thread(t)
+		n_reads += per_thread(n_reads_pt, t);
+	printf("n_reads: %lld  n_updates: %ld  n_mberror: %d\n",
+	       n_reads, n_updates, n_mberror);
+	printf("rcu_stress_count:");
+	for (i = 0; i <= RCU_STRESS_PIPE_LEN; i++) {
+		sum = 0LL;
+		for_each_thread(t) {
+			sum += per_thread(rcu_stress_count, t)[i];
+		}
+		printf(" %lld", sum);
+	}
+	printf("\n");
+	exit(0);
+}
+
+/*
+ * Mainprogram.
+ */
+
+void usage(int argc, char *argv[])
+{
+	fprintf(stderr, "Usage: %s [nreaders [ perf | stress ] ]\n", argv[0]);
+	exit(-1);
+}
+
+int main(int argc, char *argv[])
+{
+	int nreaders = 1;
+	int cpustride = 1;
+
+	smp_init();
+	//rcu_init();
+
+#ifdef DEBUG_YIELD
+	yield_active |= YIELD_READ;
+	yield_active |= YIELD_WRITE;
+#endif
+
+	if (argc > 1) {
+		nreaders = strtoul(argv[1], NULL, 0);
+		if (argc == 2)
+			perftest(nreaders, cpustride);
+		if (argc > 3)
+			cpustride = strtoul(argv[3], NULL, 0);
+		if (strcmp(argv[2], "perf") == 0)
+			perftest(nreaders, cpustride);
+		else if (strcmp(argv[2], "rperf") == 0)
+			rperftest(nreaders, cpustride);
+		else if (strcmp(argv[2], "uperf") == 0)
+			uperftest(nreaders, cpustride);
+		else if (strcmp(argv[2], "stress") == 0)
+			stresstest(nreaders);
+		usage(argc, argv);
+	}
+	perftest(nreaders, cpustride);
+	return 0;
+}
diff --git a/tests/runall.sh b/tests/runall.sh
new file mode 100755
index 0000000..e2b47dc
--- /dev/null
+++ b/tests/runall.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+#run all tests
+
+#set to number of active CPUS
+NUM_CPUS=8
+
+#extra options, e.g. for setting affinity on even CPUs :
+#EXTRA_OPTS=$(for a in $(seq 0 2 127); do echo -n "-a ${a} "; done)
+
+#ppc64 striding, use with NUM_CPUS=8
+
+#stride 1
+#EXTRA_OPTS=$(for a in $(seq 0 2 15); do echo -n "-a ${a} "; done)
+#stride 2
+#EXTRA_OPTS=$(for a in $(seq 0 4 31); do echo -n "-a ${a} "; done)
+#stride 4
+#EXTRA_OPTS=$(for a in $(seq 0 8 63); do echo -n "-a ${a} "; done)
+#stride 8
+#EXTRA_OPTS=$(for a in $(seq 0 16 127); do echo -n "-a ${a} "; done)
+
+#Vary update fraction
+#x: vary update fraction from 0 to 0.0001
+  #fix number of readers and reader C.S. length, vary delay between updates
+#y: ops/s
+
+rm -f runall.log
+rm -fr runall.detail.log
+
+
+echo Executing batch RCU test
+
+DURATION=10
+BATCH_ARRAY="1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536
+	     131072 262144"
+NR_WRITERS=$((${NUM_CPUS} / 2))
+
+rm -f batch-rcu.log
+
+NR_READERS=$((${NUM_CPUS} - ${NR_WRITERS}))
+for BATCH_SIZE in ${BATCH_ARRAY}; do
+	echo "./runtests-batch.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d 0 -b ${BATCH_SIZE} ${EXTRA_OPTS} | tee -a batch-rcu.log" >> runall.log
+	./runtests-batch.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d 0 -b ${BATCH_SIZE} ${EXTRA_OPTS} | tee -a batch-rcu.log
+done
+
+#setting gc each 32768. ** UPDATE FOR YOUR ARCHITECTURE BASED ON TEST ABOVE **
+EXTRA_OPTS+="-b 32768"
+
+echo Executing update fraction test
+
+DURATION=10
+WDELAY_ARRAY="0 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768
+              65536 131072 262144 524288 1048576 2097152 4194304 8388608
+              16777216 33554432 67108864 134217728"
+NR_WRITERS=$((${NUM_CPUS} / 2))
+
+rm -f update-fraction.log
+
+NR_READERS=$((${NUM_CPUS} - ${NR_WRITERS}))
+for WDELAY in ${WDELAY_ARRAY}; do
+	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d ${WDELAY} ${EXTRA_OPTS} | tee -a update-fraction.log" >> runall.log
+	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d ${WDELAY} ${EXTRA_OPTS} | tee -a update-fraction.log
+done
+
+#Test scalability :
+# x: vary number of readers from 0 to num cpus
+# y: ops/s
+# 0 writer.
+
+echo Executing scalability test
+
+NR_WRITERS=0
+DURATION=10
+
+rm -f scalability.log
+
+for NR_READERS in $(seq 1 ${NUM_CPUS}); do
+	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS}| tee -a scalability.log" >> runall.log
+	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS}| tee -a scalability.log
+done
+
+
+# x: Vary reader C.S. length from 0 to 100 us
+# y: ops/s
+# 8 readers
+# 0 writers
+
+echo Executing reader C.S. length test
+
+NR_READERS=${NUM_CPUS}
+NR_WRITERS=0
+DURATION=10
+#in loops.
+READERCSLEN_ARRAY="0 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152"
+
+rm -f readercslen.log
+
+for READERCSLEN in ${READERCSLEN_ARRAY}; do
+	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS} -c ${READERCSLEN} | tee -a readercslen.log" >> runall.log
+	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS} -c ${READERCSLEN} | tee -a readercslen.log
+done
diff --git a/tests/runpaul-phase1.sh b/tests/runpaul-phase1.sh
new file mode 100755
index 0000000..d2c8649
--- /dev/null
+++ b/tests/runpaul-phase1.sh
@@ -0,0 +1,44 @@
+#!/bin/sh
+
+#run all tests
+
+#set to number of active CPUS
+NUM_CPUS=64
+
+#extra options, e.g. for setting affinity on even CPUs :
+EXTRA_OPTS=$(for a in $(seq 0 2 127); do echo -n "-a ${a} "; done)
+
+#ppc64 striding, use with NUM_CPUS=8
+
+#stride 1
+#EXTRA_OPTS=$(for a in $(seq 0 2 15); do echo -n "-a ${a} "; done)
+#stride 2
+#EXTRA_OPTS=$(for a in $(seq 0 4 31); do echo -n "-a ${a} "; done)
+#stride 4
+#EXTRA_OPTS=$(for a in $(seq 0 8 63); do echo -n "-a ${a} "; done)
+#stride 8
+#EXTRA_OPTS=$(for a in $(seq 0 16 127); do echo -n "-a ${a} "; done)
+
+#Vary update fraction
+#x: vary update fraction from 0 to 0.0001
+  #fix number of readers and reader C.S. length, vary delay between updates
+#y: ops/s
+
+rm -f runall.log
+rm -fr runall.detail.log
+
+
+echo Executing batch RCU test
+
+DURATION=10
+BATCH_ARRAY="1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536
+	     131072 262144"
+NR_WRITERS=$((${NUM_CPUS} / 2))
+
+rm -f batch-rcu.log
+
+NR_READERS=$((${NUM_CPUS} - ${NR_WRITERS}))
+for BATCH_SIZE in ${BATCH_ARRAY}; do
+	echo "./runtests-batch.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d 0 -b ${BATCH_SIZE} ${EXTRA_OPTS} | tee -a batch-rcu.log" >> runall.log
+	./runtests-batch.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d 0 -b ${BATCH_SIZE} ${EXTRA_OPTS} | tee -a batch-rcu.log
+done
diff --git a/tests/runpaul-phase2.sh b/tests/runpaul-phase2.sh
new file mode 100755
index 0000000..0f6bd91
--- /dev/null
+++ b/tests/runpaul-phase2.sh
@@ -0,0 +1,47 @@
+#!/bin/sh
+
+#run all tests
+
+#set to number of active CPUS
+NUM_CPUS=64
+
+#extra options, e.g. for setting affinity on even CPUs :
+EXTRA_OPTS=$(for a in $(seq 0 2 127); do echo -n "-a ${a} "; done)
+
+#ppc64 striding, use with NUM_CPUS=8
+
+#stride 1
+#EXTRA_OPTS=$(for a in $(seq 0 2 15); do echo -n "-a ${a} "; done)
+#stride 2
+#EXTRA_OPTS=$(for a in $(seq 0 4 31); do echo -n "-a ${a} "; done)
+#stride 4
+#EXTRA_OPTS=$(for a in $(seq 0 8 63); do echo -n "-a ${a} "; done)
+#stride 8
+#EXTRA_OPTS=$(for a in $(seq 0 16 127); do echo -n "-a ${a} "; done)
+
+#Vary update fraction
+#x: vary update fraction from 0 to 0.0001
+  #fix number of readers and reader C.S. length, vary delay between updates
+#y: ops/s
+
+rm -f runall.log
+rm -fr runall.detail.log
+
+#setting gc each 32768. ** UPDATE FOR YOUR ARCHITECTURE BASED ON PHASE 1 RESULT **
+EXTRA_OPTS+="-b 32768"
+
+echo Executing update fraction test
+
+DURATION=10
+WDELAY_ARRAY="0 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768
+              65536 131072 262144 524288 1048576 2097152 4194304 8388608
+              16777216 33554432 67108864 134217728"
+NR_WRITERS=$((${NUM_CPUS} / 2))
+
+rm -f update-fraction.log
+
+NR_READERS=$((${NUM_CPUS} - ${NR_WRITERS}))
+for WDELAY in ${WDELAY_ARRAY}; do
+	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d ${WDELAY} ${EXTRA_OPTS} | tee -a update-fraction.log" >> runall.log
+	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d ${WDELAY} ${EXTRA_OPTS} | tee -a update-fraction.log
+done
diff --git a/tests/runpaul-phase3.sh b/tests/runpaul-phase3.sh
new file mode 100755
index 0000000..1956062
--- /dev/null
+++ b/tests/runpaul-phase3.sh
@@ -0,0 +1,50 @@
+#!/bin/sh
+
+#run all tests
+
+#set to number of active CPUS
+NUM_CPUS=64
+
+#extra options, e.g. for setting affinity on even CPUs :
+EXTRA_OPTS=$(for a in $(seq 0 2 127); do echo -n "-a ${a} "; done)
+
+#ppc64 striding, use with NUM_CPUS=8
+
+#stride 1
+#EXTRA_OPTS=$(for a in $(seq 0 2 15); do echo -n "-a ${a} "; done)
+#stride 2
+#EXTRA_OPTS=$(for a in $(seq 0 4 31); do echo -n "-a ${a} "; done)
+#stride 4
+#EXTRA_OPTS=$(for a in $(seq 0 8 63); do echo -n "-a ${a} "; done)
+#stride 8
+#EXTRA_OPTS=$(for a in $(seq 0 16 127); do echo -n "-a ${a} "; done)
+
+#Vary update fraction
+#x: vary update fraction from 0 to 0.0001
+  #fix number of readers and reader C.S. length, vary delay between updates
+#y: ops/s
+
+rm -f runall.log
+rm -fr runall.detail.log
+
+#setting gc each 32768. ** UPDATE FOR YOUR ARCHITECTURE BASED ON PHASE 1 RESULT **
+EXTRA_OPTS+="-b 32768"
+
+#Test scalability :
+# x: vary number of readers from 0 to num cpus
+# y: ops/s
+# 0 writer.
+
+echo Executing scalability test
+
+NR_WRITERS=0
+DURATION=10
+
+rm -f scalability.log
+
+for NR_READERS in $(seq 1 ${NUM_CPUS}); do
+	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS}| tee -a scalability.log" >> runall.log
+	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS}| tee -a scalability.log
+done
+
+
diff --git a/tests/runpaul-phase4.sh b/tests/runpaul-phase4.sh
new file mode 100755
index 0000000..ede402c
--- /dev/null
+++ b/tests/runpaul-phase4.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+#run all tests
+
+#set to number of active CPUS
+export NUM_CPUS=8
+
+#extra options, e.g. for setting affinity on even CPUs :
+#EXTRA_OPTS=$(for a in $(seq 0 2 127); do echo -n "-a ${a} "; done)
+
+#ppc64 striding, use with NUM_CPUS=8
+
+rm -f *.log
+
+#stride 1
+export EXTRA_OPTS=$(for a in $(seq 0 2 15); do echo -n "-a ${a} "; done)
+sh subphase4.sh $*
+mkdir ppc64-8cores-stride1
+mv *.log ppc64-8cores-stride1/
+
+
+#stride 2
+export EXTRA_OPTS=$(for a in $(seq 0 4 31); do echo -n "-a ${a} "; done)
+sh subphase4.sh $*
+mkdir ppc64-8cores-stride2
+mv *.log ppc64-8cores-stride2/
+
+
+#stride 4
+export EXTRA_OPTS=$(for a in $(seq 0 8 63); do echo -n "-a ${a} "; done)
+sh subphase4.sh $*
+mkdir ppc64-8cores-stride4
+mv *.log ppc64-8cores-stride4/
+
+
+#stride 8
+export EXTRA_OPTS=$(for a in $(seq 0 16 127); do echo -n "-a ${a} "; done)
+sh subphase4.sh $*
+mkdir ppc64-8cores-stride8
+mv *.log ppc64-8cores-stride8/
diff --git a/tests/runpaul-phase5.sh b/tests/runpaul-phase5.sh
new file mode 100644
index 0000000..bb4bfe7
--- /dev/null
+++ b/tests/runpaul-phase5.sh
@@ -0,0 +1,11 @@
+# test run after write-size update
+
+sh runpaul-phase1.sh
+mkdir runpaul-phase1
+mv *.log runpaul-phase1/
+
+sh runpaul-phase2.sh
+mkdir runpaul-phase2
+mv *.log runpaul-phase2/
+
+sh runpaul-phase4.sh
diff --git a/tests/runpaul-phase6.sh b/tests/runpaul-phase6.sh
new file mode 100644
index 0000000..5f65072
--- /dev/null
+++ b/tests/runpaul-phase6.sh
@@ -0,0 +1,7 @@
+sh runpaul-phase1.sh
+mkdir runpaul-phase1
+mv *.log runpaul-phase1/
+
+sh runpaul-phase2.sh
+mkdir runpaul-phase2
+mv *.log runpaul-phase2/
diff --git a/tests/runtests-batch.sh b/tests/runtests-batch.sh
new file mode 100755
index 0000000..67e019b
--- /dev/null
+++ b/tests/runtests-batch.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+#for a in test_urcu_gc test_urcu_gc_mb test_qsbr_gc; do
+for a in test_urcu_gc; do
+	echo "./${a} $*" | tee -a runall.detail.log
+	/usr/bin/time --append --output runall.detail.log ./${a} $*
+done
+
diff --git a/tests/runtests.sh b/tests/runtests.sh
new file mode 100755
index 0000000..981aef1
--- /dev/null
+++ b/tests/runtests.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+for a in test_urcu_gc test_urcu_gc_mb test_urcu test_urcu_mb \
+			test_urcu_lgc test_qsbr_lgc test_urcu_lgc_mb \
+			test_qsbr test_qsbr_gc test_rwlock test_perthreadlock \
+			test_mutex; do
+	echo "./${a} $*" | tee -a runall.detail.log
+	/usr/bin/time --append --output runall.detail.log ./${a} $*
+done
+
diff --git a/tests/subphase4.sh b/tests/subphase4.sh
new file mode 100755
index 0000000..b69ffbf
--- /dev/null
+++ b/tests/subphase4.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+#run all tests
+
+#set to number of active CPUS
+#NUM_CPUS=8
+
+#extra options, e.g. for setting affinity on even CPUs :
+#EXTRA_OPTS=$(for a in $(seq 0 2 127); do echo -n "-a ${a} "; done)
+
+#ppc64 striding, use with NUM_CPUS=8
+
+#stride 1
+#EXTRA_OPTS=$(for a in $(seq 0 2 15); do echo -n "-a ${a} "; done)
+#stride 2
+#EXTRA_OPTS=$(for a in $(seq 0 4 31); do echo -n "-a ${a} "; done)
+#stride 4
+#EXTRA_OPTS=$(for a in $(seq 0 8 63); do echo -n "-a ${a} "; done)
+#stride 8
+#EXTRA_OPTS=$(for a in $(seq 0 16 127); do echo -n "-a ${a} "; done)
+
+#Vary update fraction
+#x: vary update fraction from 0 to 0.0001
+  #fix number of readers and reader C.S. length, vary delay between updates
+#y: ops/s
+
+rm -f runall.log
+rm -fr runall.detail.log
+
+
+echo Executing batch RCU test
+
+DURATION=10
+BATCH_ARRAY="1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536
+	     131072 262144"
+NR_WRITERS=$((${NUM_CPUS} / 2))
+
+rm -f batch-rcu.log
+
+NR_READERS=$((${NUM_CPUS} - ${NR_WRITERS}))
+for BATCH_SIZE in ${BATCH_ARRAY}; do
+	echo "./runtests-batch.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d 0 -b ${BATCH_SIZE} ${EXTRA_OPTS} | tee -a batch-rcu.log" >> runall.log
+	./runtests-batch.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d 0 -b ${BATCH_SIZE} ${EXTRA_OPTS} | tee -a batch-rcu.log
+done
+
+#setting gc each 4096. ** UPDATE FOR YOUR ARCHITECTURE BASED ON TEST ABOVE **
+EXTRA_OPTS+="-b 32768"
+
+echo Executing update fraction test
+
+DURATION=10
+WDELAY_ARRAY="0 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768
+              65536 131072 262144 524288 1048576 2097152 4194304 8388608
+              16777216 33554432 67108864 134217728"
+NR_WRITERS=$((${NUM_CPUS} / 2))
+
+rm -f update-fraction.log
+
+NR_READERS=$((${NUM_CPUS} - ${NR_WRITERS}))
+for WDELAY in ${WDELAY_ARRAY}; do
+	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d ${WDELAY} ${EXTRA_OPTS} | tee -a update-fraction.log" >> runall.log
+	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} -d ${WDELAY} ${EXTRA_OPTS} | tee -a update-fraction.log
+done
+
+#Test scalability :
+# x: vary number of readers from 0 to num cpus
+# y: ops/s
+# 0 writer.
+
+echo Executing scalability test
+
+NR_WRITERS=0
+DURATION=10
+
+rm -f scalability.log
+
+for NR_READERS in $(seq 1 ${NUM_CPUS}); do
+	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS}| tee -a scalability.log" >> runall.log
+	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS}| tee -a scalability.log
+done
+
+
+# x: Vary reader C.S. length from 0 to 100 us
+# y: ops/s
+# 8 readers
+# 0 writers
+
+echo Executing reader C.S. length test
+
+NR_READERS=${NUM_CPUS}
+NR_WRITERS=0
+DURATION=10
+#in loops.
+READERCSLEN_ARRAY="0 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152"
+
+rm -f readercslen.log
+
+for READERCSLEN in ${READERCSLEN_ARRAY}; do
+	echo "./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS} -c ${READERCSLEN} | tee -a readercslen.log" >> runall.log
+	./runtests.sh ${NR_READERS} ${NR_WRITERS} ${DURATION} ${EXTRA_OPTS} -c ${READERCSLEN} | tee -a readercslen.log
+done
diff --git a/tests/test_looplen.c b/tests/test_looplen.c
new file mode 100644
index 0000000..5006951
--- /dev/null
+++ b/tests/test_looplen.c
@@ -0,0 +1,89 @@
+/*
+ * test_looplen.c
+ *
+ * Userspace RCU library - test program
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sched.h>
+
+#include "../arch.h"
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+	return getpid();
+}
+#endif
+
+#ifndef DYNAMIC_LINK_TEST
+#define _LGPL_SOURCE
+#else
+#define debug_yield_read()
+#endif
+#include "../urcu.h"
+
+static inline void loop_sleep(unsigned long l)
+{
+	while(l-- != 0)
+		cpu_relax();
+}
+
+#define LOOPS 1048576
+#define TESTS 10
+
+int main(int argc, char **argv)
+{
+	unsigned long i;
+	cycles_t time1, time2;
+	cycles_t time_tot = 0;
+	double cpl;
+
+	for (i = 0; i < TESTS; i++) {
+		time1 = get_cycles();
+		loop_sleep(LOOPS);
+		time2 = get_cycles();
+		time_tot += time2 - time1;
+	}
+	cpl = ((double)time_tot) / (double)TESTS / (double)LOOPS;
+
+	printf("CALIBRATION : %g cycles per loop\n", cpl);
+	printf("time_tot = %llu, LOOPS = %d, TESTS = %d\n",
+	       time_tot, LOOPS, TESTS);
+
+	return 0;
+}
diff --git a/tests/test_mutex.c b/tests/test_mutex.c
new file mode 100644
index 0000000..e94819a
--- /dev/null
+++ b/tests/test_mutex.c
@@ -0,0 +1,392 @@
+/*
+ * test_urcu.c
+ *
+ * Userspace RCU library - test program
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sched.h>
+
+#include "../arch.h"
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+/* hardcoded number of CPUs */
+#define NR_CPUS 16384
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+	return getpid();
+}
+#endif
+
+#ifndef DYNAMIC_LINK_TEST
+#define _LGPL_SOURCE
+#else
+#define debug_yield_read()
+#endif
+#include "../urcu.h"
+
+struct test_array {
+	int a;
+};
+
+static pthread_mutex_t lock;
+
+static volatile int test_go, test_stop;
+
+static unsigned long wdelay;
+
+static volatile struct test_array test_array = { 8 };
+
+static unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+static unsigned long rduration;
+
+static inline void loop_sleep(unsigned long l)
+{
+	while(l-- != 0)
+		cpu_relax();
+}
+
+static int verbose_mode;
+
+#define printf_verbose(fmt, args...)		\
+	do {					\
+		if (verbose_mode)		\
+			printf(fmt, args);	\
+	} while (0)
+
+static unsigned int cpu_affinities[NR_CPUS];
+static unsigned int next_aff = 0;
+static int use_affinity = 0;
+
+pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void set_affinity(void)
+{
+	cpu_set_t mask;
+	int cpu;
+	int ret;
+
+	if (!use_affinity)
+		return;
+
+	ret = pthread_mutex_lock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+	cpu = cpu_affinities[next_aff++];
+	ret = pthread_mutex_unlock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+/*
+ * returns 0 if test should end.
+ */
+static int test_duration_write(void)
+{
+	return !test_stop;
+}
+
+static int test_duration_read(void)
+{
+	return !test_stop;
+}
+
+static unsigned long long __thread nr_writes;
+static unsigned long long __thread nr_reads;
+
+static
+unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+static
+unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_reads;
+
+static unsigned int nr_readers;
+static unsigned int nr_writers;
+
+pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void rcu_copy_mutex_lock(void)
+{
+	int ret;
+	ret = pthread_mutex_lock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+	int ret;
+
+	ret = pthread_mutex_unlock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+}
+
+void *thr_reader(void *data)
+{
+	unsigned long tidx = (unsigned long)data;
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	while (!test_go)
+	{
+	}
+
+	for (;;) {
+		pthread_mutex_lock(&lock);
+		assert(test_array.a == 8);
+		if (unlikely(rduration))
+			loop_sleep(rduration);
+		pthread_mutex_unlock(&lock);
+		nr_reads++;
+		if (unlikely(!test_duration_read()))
+			break;
+	}
+
+	tot_nr_reads[tidx] = nr_reads;
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	return ((void*)1);
+
+}
+
+void *thr_writer(void *data)
+{
+	unsigned long wtidx = (unsigned long)data;
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	while (!test_go)
+	{
+	}
+	smp_mb();
+
+	for (;;) {
+		pthread_mutex_lock(&lock);
+		test_array.a = 0;
+		test_array.a = 8;
+		pthread_mutex_unlock(&lock);
+		nr_writes++;
+		if (unlikely(!test_duration_write()))
+			break;
+		if (unlikely(wdelay))
+			loop_sleep(wdelay);
+	}
+
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	tot_nr_writes[wtidx] = nr_writes;
+	return ((void*)2);
+}
+
+void show_usage(int argc, char **argv)
+{
+	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
+#ifdef DEBUG_YIELD
+	printf(" [-r] [-w] (yield reader and/or writer)");
+#endif
+	printf(" [-d delay] (writer period (us))");
+	printf(" [-c duration] (reader C.S. duration (in loops))");
+	printf(" [-v] (verbose output)");
+	printf(" [-a cpu#] [-a cpu#]... (affinity)");
+	printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	int err;
+	pthread_t *tid_reader, *tid_writer;
+	void *tret;
+	unsigned long long *count_reader, *count_writer;
+	unsigned long long tot_reads = 0, tot_writes = 0;
+	int i, a;
+
+	if (argc < 4) {
+		show_usage(argc, argv);
+		return -1;
+	}
+	smp_mb();
+
+	err = sscanf(argv[1], "%u", &nr_readers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	err = sscanf(argv[2], "%u", &nr_writers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+	
+	err = sscanf(argv[3], "%lu", &duration);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	for (i = 4; i < argc; i++) {
+		if (argv[i][0] != '-')
+			continue;
+		switch (argv[i][1]) {
+#ifdef DEBUG_YIELD
+		case 'r':
+			yield_active |= YIELD_READ;
+			break;
+		case 'w':
+			yield_active |= YIELD_WRITE;
+			break;
+#endif
+		case 'a':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			a = atoi(argv[++i]);
+			cpu_affinities[next_aff++] = a;
+			use_affinity = 1;
+			printf_verbose("Adding CPU %d affinity\n", a);
+			break;
+		case 'c':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			rduration = atol(argv[++i]);
+			break;
+		case 'd':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			wdelay = atol(argv[++i]);
+			break;
+		case 'v':
+			verbose_mode = 1;
+			break;
+		}
+	}
+
+	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
+		duration, nr_readers, nr_writers);
+	printf_verbose("Writer delay : %lu loops.\n", wdelay);
+	printf_verbose("Reader duration : %lu loops.\n", rduration);
+	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
+			"main", pthread_self(), (unsigned long)gettid());
+
+	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
+	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
+	count_reader = malloc(sizeof(*count_reader) * nr_readers);
+	count_writer = malloc(sizeof(*count_writer) * nr_writers);
+	tot_nr_reads = malloc(sizeof(*tot_nr_reads) * nr_readers);
+	tot_nr_writes = malloc(sizeof(*tot_nr_writes) * nr_writers);
+
+	next_aff = 0;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_create(&tid_reader[i], NULL, thr_reader,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_create(&tid_writer[i], NULL, thr_writer,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+
+	smp_mb();
+
+	test_go = 1;
+
+	sleep(duration);
+
+	test_stop = 1;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_join(tid_reader[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_reads += tot_nr_reads[i];
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_join(tid_writer[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_writes += tot_nr_writes[i];
+	}
+
+	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
+	       tot_writes);
+	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
+		"nr_writers %3u "
+		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu\n",
+		argv[0], duration, nr_readers, rduration,
+		nr_writers, wdelay, tot_reads, tot_writes,
+		tot_reads + tot_writes);
+
+	free(tid_reader);
+	free(tid_writer);
+	free(count_reader);
+	free(count_writer);
+	free(tot_nr_reads);
+	free(tot_nr_writes);
+	return 0;
+}
diff --git a/tests/test_perthreadlock.c b/tests/test_perthreadlock.c
new file mode 100644
index 0000000..7402f01
--- /dev/null
+++ b/tests/test_perthreadlock.c
@@ -0,0 +1,403 @@
+/*
+ * test_urcu.c
+ *
+ * Userspace RCU library - test program
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sched.h>
+
+#include "../arch.h"
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+/* hardcoded number of CPUs */
+#define NR_CPUS 16384
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+	return getpid();
+}
+#endif
+
+#ifndef DYNAMIC_LINK_TEST
+#define _LGPL_SOURCE
+#else
+#define debug_yield_read()
+#endif
+#include "../urcu.h"
+
+struct test_array {
+	int a;
+};
+
+struct per_thread_lock {
+	pthread_mutex_t lock;
+} __attribute__((aligned(CACHE_LINE_SIZE)));	/* cache-line aligned */
+
+static struct per_thread_lock *per_thread_lock;
+
+static volatile int test_go, test_stop;
+
+static unsigned long wdelay;
+
+static volatile struct test_array test_array = { 8 };
+
+static unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+static unsigned long rduration;
+
+static inline void loop_sleep(unsigned long l)
+{
+	while(l-- != 0)
+		cpu_relax();
+}
+
+static int verbose_mode;
+
+#define printf_verbose(fmt, args...)		\
+	do {					\
+		if (verbose_mode)		\
+			printf(fmt, args);	\
+	} while (0)
+
+static unsigned int cpu_affinities[NR_CPUS];
+static unsigned int next_aff = 0;
+static int use_affinity = 0;
+
+pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void set_affinity(void)
+{
+	cpu_set_t mask;
+	int cpu;
+	int ret;
+
+	if (!use_affinity)
+		return;
+
+	ret = pthread_mutex_lock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+	cpu = cpu_affinities[next_aff++];
+	ret = pthread_mutex_unlock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+/*
+ * returns 0 if test should end.
+ */
+static int test_duration_write(void)
+{
+	return !test_stop;
+}
+
+static int test_duration_read(void)
+{
+	return !test_stop;
+}
+
+static unsigned long long __thread nr_writes;
+static unsigned long long __thread nr_reads;
+
+static
+unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+static
+unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_reads;
+
+static unsigned int nr_readers;
+static unsigned int nr_writers;
+
+pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void rcu_copy_mutex_lock(void)
+{
+	int ret;
+	ret = pthread_mutex_lock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+	int ret;
+
+	ret = pthread_mutex_unlock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+}
+
+void *thr_reader(void *data)
+{
+	unsigned long tidx = (unsigned long)data;
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	while (!test_go)
+	{
+	}
+
+	for (;;) {
+		pthread_mutex_lock(&per_thread_lock[tidx].lock);
+		assert(test_array.a == 8);
+		if (unlikely(rduration))
+			loop_sleep(rduration);
+		pthread_mutex_unlock(&per_thread_lock[tidx].lock);
+		nr_reads++;
+		if (unlikely(!test_duration_read()))
+			break;
+	}
+
+	tot_nr_reads[tidx] = nr_reads;
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	return ((void*)1);
+
+}
+
+void *thr_writer(void *data)
+{
+	unsigned long wtidx = (unsigned long)data;
+	long tidx;
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	while (!test_go)
+	{
+	}
+	smp_mb();
+
+	for (;;) {
+		for (tidx = 0; tidx < nr_readers; tidx++) {
+			pthread_mutex_lock(&per_thread_lock[tidx].lock);
+		}
+		test_array.a = 0;
+		test_array.a = 8;
+		for (tidx = (long)nr_readers - 1; tidx >= 0; tidx--) {
+			pthread_mutex_unlock(&per_thread_lock[tidx].lock);
+		}
+		nr_writes++;
+		if (unlikely(!test_duration_write()))
+			break;
+		if (unlikely(wdelay))
+			loop_sleep(wdelay);
+	}
+
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	tot_nr_writes[wtidx] = nr_writes;
+	return ((void*)2);
+}
+
+void show_usage(int argc, char **argv)
+{
+	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
+#ifdef DEBUG_YIELD
+	printf(" [-r] [-w] (yield reader and/or writer)");
+#endif
+	printf(" [-d delay] (writer period (us))");
+	printf(" [-c duration] (reader C.S. duration (in loops))");
+	printf(" [-v] (verbose output)");
+	printf(" [-a cpu#] [-a cpu#]... (affinity)");
+	printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	int err;
+	pthread_t *tid_reader, *tid_writer;
+	void *tret;
+	unsigned long long *count_reader, *count_writer;
+	unsigned long long tot_reads = 0, tot_writes = 0;
+	int i, a;
+
+	if (argc < 4) {
+		show_usage(argc, argv);
+		return -1;
+	}
+	smp_mb();
+
+	err = sscanf(argv[1], "%u", &nr_readers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	err = sscanf(argv[2], "%u", &nr_writers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+	
+	err = sscanf(argv[3], "%lu", &duration);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	for (i = 4; i < argc; i++) {
+		if (argv[i][0] != '-')
+			continue;
+		switch (argv[i][1]) {
+#ifdef DEBUG_YIELD
+		case 'r':
+			yield_active |= YIELD_READ;
+			break;
+		case 'w':
+			yield_active |= YIELD_WRITE;
+			break;
+#endif
+		case 'a':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			a = atoi(argv[++i]);
+			cpu_affinities[next_aff++] = a;
+			use_affinity = 1;
+			printf_verbose("Adding CPU %d affinity\n", a);
+			break;
+		case 'c':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			rduration = atol(argv[++i]);
+			break;
+		case 'd':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			wdelay = atol(argv[++i]);
+			break;
+		case 'v':
+			verbose_mode = 1;
+			break;
+		}
+	}
+
+	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
+		duration, nr_readers, nr_writers);
+	printf_verbose("Writer delay : %lu loops.\n", wdelay);
+	printf_verbose("Reader duration : %lu loops.\n", rduration);
+	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
+			"main", pthread_self(), (unsigned long)gettid());
+
+	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
+	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
+	count_reader = malloc(sizeof(*count_reader) * nr_readers);
+	count_writer = malloc(sizeof(*count_writer) * nr_writers);
+	tot_nr_reads = malloc(sizeof(*tot_nr_reads) * nr_readers);
+	tot_nr_writes = malloc(sizeof(*tot_nr_writes) * nr_writers);
+	per_thread_lock = malloc(sizeof(*per_thread_lock) * nr_readers);
+
+	next_aff = 0;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_create(&tid_reader[i], NULL, thr_reader,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_create(&tid_writer[i], NULL, thr_writer,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+
+	smp_mb();
+
+	test_go = 1;
+
+	sleep(duration);
+
+	test_stop = 1;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_join(tid_reader[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_reads += tot_nr_reads[i];
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_join(tid_writer[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_writes += tot_nr_writes[i];
+	}
+
+	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
+	       tot_writes);
+	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
+		"nr_writers %3u "
+		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu\n",
+		argv[0], duration, nr_readers, rduration,
+		nr_writers, wdelay, tot_reads, tot_writes,
+		tot_reads + tot_writes);
+
+	free(tid_reader);
+	free(tid_writer);
+	free(count_reader);
+	free(count_writer);
+	free(tot_nr_reads);
+	free(tot_nr_writes);
+	free(per_thread_lock);
+	return 0;
+}
diff --git a/tests/test_perthreadlock_timing.c b/tests/test_perthreadlock_timing.c
new file mode 100644
index 0000000..d5bd912
--- /dev/null
+++ b/tests/test_perthreadlock_timing.c
@@ -0,0 +1,214 @@
+/*
+ * test_perthreadloc_timing.c
+ *
+ * Per thread locks - test program
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <pthread.h>
+
+#include "../arch.h"
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+	return getpid();
+}
+#endif
+
+#include "../urcu.h"
+
+struct test_array {
+	int a;
+};
+
+static struct test_array test_array = { 8 };
+
+struct per_thread_lock {
+	pthread_mutex_t lock;
+} __attribute__((aligned(CACHE_LINE_SIZE)));	/* cache-line aligned */
+
+static struct per_thread_lock *per_thread_lock;
+
+#define OUTER_READ_LOOP	200U
+#define INNER_READ_LOOP	100000U
+#define READ_LOOP ((unsigned long long)OUTER_READ_LOOP * INNER_READ_LOOP)
+
+#define OUTER_WRITE_LOOP 10U
+#define INNER_WRITE_LOOP 200U
+#define WRITE_LOOP ((unsigned long long)OUTER_WRITE_LOOP * INNER_WRITE_LOOP)
+
+static int num_read;
+static int num_write;
+
+#define NR_READ num_read
+#define NR_WRITE num_write
+
+static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
+static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
+
+void *thr_reader(void *arg)
+{
+	int i, j;
+	cycles_t time1, time2;
+	long tidx = (long)arg;
+
+	printf("thread_begin %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	sleep(2);
+
+	time1 = get_cycles();
+	for (i = 0; i < OUTER_READ_LOOP; i++) {
+		for (j = 0; j < INNER_READ_LOOP; j++) {
+			pthread_mutex_lock(&per_thread_lock[tidx].lock);
+			assert(test_array.a == 8);
+			pthread_mutex_unlock(&per_thread_lock[tidx].lock);
+		}
+	}
+	time2 = get_cycles();
+
+	reader_time[tidx] = time2 - time1;
+
+	sleep(2);
+	printf("thread_end %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	return ((void*)1);
+
+}
+
+void *thr_writer(void *arg)
+{
+	int i, j;
+	long tidx;
+	cycles_t time1, time2;
+
+	printf("thread_begin %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	sleep(2);
+
+	for (i = 0; i < OUTER_WRITE_LOOP; i++) {
+		for (j = 0; j < INNER_WRITE_LOOP; j++) {
+			time1 = get_cycles();
+			for (tidx = 0; tidx < NR_READ; tidx++) {
+				pthread_mutex_lock(&per_thread_lock[tidx].lock);
+			}
+			test_array.a = 8;
+			for (tidx = NR_READ - 1; tidx >= 0; tidx--) {
+				pthread_mutex_unlock(&per_thread_lock[tidx].lock);
+			}
+			time2 = get_cycles();
+			writer_time[(unsigned long)arg] += time2 - time1;
+			usleep(1);
+		}
+	}
+
+	printf("thread_end %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	return ((void*)2);
+}
+
+int main(int argc, char **argv)
+{
+	int err;
+	pthread_t *tid_reader, *tid_writer;
+	void *tret;
+	int i;
+	cycles_t tot_rtime = 0;
+	cycles_t tot_wtime = 0;
+
+	if (argc < 2) {
+		printf("Usage : %s nr_readers nr_writers\n", argv[0]);
+		exit(-1);
+	}
+	num_read = atoi(argv[1]);
+	num_write = atoi(argv[2]);
+
+	reader_time = malloc(sizeof(*reader_time) * num_read);
+	writer_time = malloc(sizeof(*writer_time) * num_write);
+	tid_reader = malloc(sizeof(*tid_reader) * num_read);
+	tid_writer = malloc(sizeof(*tid_writer) * num_write);
+
+	printf("thread %-6s, thread id : %lx, tid %lu\n",
+			"main", pthread_self(), (unsigned long)gettid());
+
+	per_thread_lock = malloc(sizeof(struct per_thread_lock) * NR_READ);
+
+	for (i = 0; i < NR_READ; i++) {
+		pthread_mutex_init(&per_thread_lock[i].lock, NULL);
+	}
+	for (i = 0; i < NR_READ; i++) {
+		err = pthread_create(&tid_reader[i], NULL, thr_reader,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+	for (i = 0; i < NR_WRITE; i++) {
+		err = pthread_create(&tid_writer[i], NULL, thr_writer,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+
+	sleep(10);
+
+	for (i = 0; i < NR_READ; i++) {
+		err = pthread_join(tid_reader[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_rtime += reader_time[i];
+	}
+	for (i = 0; i < NR_WRITE; i++) {
+		err = pthread_join(tid_writer[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_wtime += writer_time[i];
+	}
+	printf("Time per read : %g cycles\n",
+	       (double)tot_rtime / ((double)NR_READ * (double)READ_LOOP));
+	printf("Time per write : %g cycles\n",
+	       (double)tot_wtime / ((double)NR_WRITE * (double)WRITE_LOOP));
+	free(per_thread_lock);
+
+	free(reader_time);
+	free(writer_time);
+	free(tid_reader);
+	free(tid_writer);
+
+	return 0;
+}
diff --git a/tests/test_qsbr.c b/tests/test_qsbr.c
new file mode 100644
index 0000000..89a9423
--- /dev/null
+++ b/tests/test_qsbr.c
@@ -0,0 +1,430 @@
+/*
+ * test_urcu.c
+ *
+ * Userspace RCU library - test program
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sched.h>
+
+#include "../arch.h"
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+/* hardcoded number of CPUs */
+#define NR_CPUS 16384
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+	return getpid();
+}
+#endif
+
+#define _LGPL_SOURCE
+#include "../urcu-qsbr.h"
+
+struct test_array {
+	int a;
+};
+
+static volatile int test_go, test_stop;
+
+static unsigned long wdelay;
+
+static struct test_array *test_rcu_pointer;
+
+static unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+static unsigned long rduration;
+
+static inline void loop_sleep(unsigned long l)
+{
+	while(l-- != 0)
+		cpu_relax();
+}
+
+static int verbose_mode;
+
+#define printf_verbose(fmt, args...)		\
+	do {					\
+		if (verbose_mode)		\
+			printf(fmt, args);	\
+	} while (0)
+
+static unsigned int cpu_affinities[NR_CPUS];
+static unsigned int next_aff = 0;
+static int use_affinity = 0;
+
+pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void set_affinity(void)
+{
+	cpu_set_t mask;
+	int cpu;
+	int ret;
+
+	if (!use_affinity)
+		return;
+
+	ret = pthread_mutex_lock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+	cpu = cpu_affinities[next_aff++];
+	ret = pthread_mutex_unlock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+/*
+ * returns 0 if test should end.
+ */
+static int test_duration_write(void)
+{
+	return !test_stop;
+}
+
+static int test_duration_read(void)
+{
+	return !test_stop;
+}
+
+static unsigned long long __thread nr_writes;
+static unsigned long long __thread nr_reads;
+
+static unsigned int nr_readers;
+static unsigned int nr_writers;
+
+pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void rcu_copy_mutex_lock(void)
+{
+	int ret;
+	ret = pthread_mutex_lock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+	int ret;
+
+	ret = pthread_mutex_unlock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+}
+
+/*
+ * malloc/free are reusing memory areas too quickly, which does not let us
+ * test races appropriately. Use a large circular array for allocations.
+ * ARRAY_SIZE is larger than nr_writers, which insures we never run over our tail.
+ */
+#define ARRAY_SIZE (1048576 * nr_writers)
+#define ARRAY_POISON 0xDEADBEEF
+static int array_index;
+static struct test_array *test_array;
+
+static struct test_array *test_array_alloc(void)
+{
+	struct test_array *ret;
+	int index;
+
+	rcu_copy_mutex_lock();
+	index = array_index % ARRAY_SIZE;
+	assert(test_array[index].a == ARRAY_POISON ||
+		test_array[index].a == 0);
+	ret = &test_array[index];
+	array_index++;
+	if (array_index == ARRAY_SIZE)
+		array_index = 0;
+	rcu_copy_mutex_unlock();
+	return ret;
+}
+
+static void test_array_free(struct test_array *ptr)
+{
+	if (!ptr)
+		return;
+	rcu_copy_mutex_lock();
+	ptr->a = ARRAY_POISON;
+	rcu_copy_mutex_unlock();
+}
+
+void *thr_reader(void *_count)
+{
+	unsigned long long *count = _count;
+	struct test_array *local_ptr;
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	rcu_register_thread();
+
+	while (!test_go)
+	{
+	}
+	smp_mb();
+
+	for (;;) {
+		_rcu_read_lock();
+		local_ptr = _rcu_dereference(test_rcu_pointer);
+		debug_yield_read();
+		if (local_ptr)
+			assert(local_ptr->a == 8);
+		if (unlikely(rduration))
+			loop_sleep(rduration);
+		_rcu_read_unlock();
+		nr_reads++;
+		/* QS each 1024 reads */
+		if (unlikely((nr_reads & ((1 << 10) - 1)) == 0))
+			_rcu_quiescent_state();
+		if (unlikely(!test_duration_read()))
+			break;
+	}
+
+	rcu_unregister_thread();
+
+	*count = nr_reads;
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	return ((void*)1);
+
+}
+
+void *thr_writer(void *_count)
+{
+	unsigned long long *count = _count;
+	struct test_array *new, *old;
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	while (!test_go)
+	{
+	}
+	smp_mb();
+
+	for (;;) {
+		new = test_array_alloc();
+		new->a = 8;
+		old = _rcu_publish_content(&test_rcu_pointer, new);
+		/* can be done after unlock */
+		if (old)
+			old->a = 0;
+		test_array_free(old);
+		nr_writes++;
+		if (unlikely(!test_duration_write()))
+			break;
+		if (unlikely(wdelay))
+			loop_sleep(wdelay);
+	}
+
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	*count = nr_writes;
+	return ((void*)2);
+}
+
+void show_usage(int argc, char **argv)
+{
+	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
+#ifdef DEBUG_YIELD
+	printf(" [-r] [-w] (yield reader and/or writer)");
+#endif
+	printf(" [-d delay] (writer period (us))");
+	printf(" [-c duration] (reader C.S. duration (in loops))");
+	printf(" [-v] (verbose output)");
+	printf(" [-a cpu#] [-a cpu#]... (affinity)");
+	printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	int err;
+	pthread_t *tid_reader, *tid_writer;
+	void *tret;
+	unsigned long long *count_reader, *count_writer;
+	unsigned long long tot_reads = 0, tot_writes = 0;
+	int i, a;
+
+	if (argc < 4) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	err = sscanf(argv[1], "%u", &nr_readers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	err = sscanf(argv[2], "%u", &nr_writers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+	
+	err = sscanf(argv[3], "%lu", &duration);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	for (i = 4; i < argc; i++) {
+		if (argv[i][0] != '-')
+			continue;
+		switch (argv[i][1]) {
+#ifdef DEBUG_YIELD
+		case 'r':
+			yield_active |= YIELD_READ;
+			break;
+		case 'w':
+			yield_active |= YIELD_WRITE;
+			break;
+#endif
+		case 'a':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			a = atoi(argv[++i]);
+			cpu_affinities[next_aff++] = a;
+			use_affinity = 1;
+			printf_verbose("Adding CPU %d affinity\n", a);
+			break;
+		case 'c':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			rduration = atol(argv[++i]);
+			break;
+		case 'd':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			wdelay = atol(argv[++i]);
+			break;
+		case 'v':
+			verbose_mode = 1;
+			break;
+		}
+	}
+
+	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
+		duration, nr_readers, nr_writers);
+	printf_verbose("Writer delay : %lu loops.\n", wdelay);
+	printf_verbose("Reader duration : %lu loops.\n", rduration);
+	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
+			"main", pthread_self(), (unsigned long)gettid());
+
+	test_array = malloc(sizeof(*test_array) * ARRAY_SIZE);
+	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
+	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
+	count_reader = malloc(sizeof(*count_reader) * nr_readers);
+	count_writer = malloc(sizeof(*count_writer) * nr_writers);
+
+	next_aff = 0;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_create(&tid_reader[i], NULL, thr_reader,
+				     &count_reader[i]);
+		if (err != 0)
+			exit(1);
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_create(&tid_writer[i], NULL, thr_writer,
+				     &count_writer[i]);
+		if (err != 0)
+			exit(1);
+	}
+
+	smp_mb();
+
+	test_go = 1;
+
+	sleep(duration);
+
+	test_stop = 1;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_join(tid_reader[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_reads += count_reader[i];
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_join(tid_writer[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_writes += count_writer[i];
+	}
+	
+	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
+	       tot_writes);
+	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
+		"nr_writers %3u "
+		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu\n",
+		argv[0], duration, nr_readers, rduration,
+		nr_writers, wdelay, tot_reads, tot_writes,
+		tot_reads + tot_writes);
+	test_array_free(test_rcu_pointer);
+	free(test_array);
+	free(tid_reader);
+	free(tid_writer);
+	free(count_reader);
+	free(count_writer);
+	return 0;
+}
diff --git a/tests/test_qsbr_gc.c b/tests/test_qsbr_gc.c
new file mode 100644
index 0000000..004672d
--- /dev/null
+++ b/tests/test_qsbr_gc.c
@@ -0,0 +1,461 @@
+/*
+ * test_urcu_gc.c
+ *
+ * Userspace RCU library - test program (with baatch reclamation)
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sched.h>
+
+#include "../arch.h"
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+/* hardcoded number of CPUs */
+#define NR_CPUS 16384
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+	return getpid();
+}
+#endif
+
+#define _LGPL_SOURCE
+#include "../urcu-qsbr.h"
+
+struct test_array {
+	int a;
+};
+
+static volatile int test_go, test_stop;
+
+static unsigned long wdelay;
+
+static struct test_array *test_rcu_pointer;
+
+static unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+static unsigned long rduration;
+static unsigned int reclaim_batch = 1;
+
+struct reclaim_queue {
+	void **queue;	/* Beginning of queue */
+	void **head;	/* Insert position */
+};
+
+static struct reclaim_queue *pending_reclaims;
+
+
+static inline void loop_sleep(unsigned long l)
+{
+	while(l-- != 0)
+		cpu_relax();
+}
+
+static int verbose_mode;
+
+#define printf_verbose(fmt, args...)		\
+	do {					\
+		if (verbose_mode)		\
+			printf(fmt, args);	\
+	} while (0)
+
+static unsigned int cpu_affinities[NR_CPUS];
+static unsigned int next_aff = 0;
+static int use_affinity = 0;
+
+pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void set_affinity(void)
+{
+	cpu_set_t mask;
+	int cpu;
+	int ret;
+
+	if (!use_affinity)
+		return;
+
+	ret = pthread_mutex_lock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+	cpu = cpu_affinities[next_aff++];
+	ret = pthread_mutex_unlock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+/*
+ * returns 0 if test should end.
+ */
+static int test_duration_write(void)
+{
+	return !test_stop;
+}
+
+static int test_duration_read(void)
+{
+	return !test_stop;
+}
+
+static unsigned long long __thread nr_writes;
+static unsigned long long __thread nr_reads;
+
+static unsigned int nr_readers;
+static unsigned int nr_writers;
+
+pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+static
+unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+
+
+void rcu_copy_mutex_lock(void)
+{
+	int ret;
+	ret = pthread_mutex_lock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+	int ret;
+
+	ret = pthread_mutex_unlock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+}
+
+void *thr_reader(void *_count)
+{
+	unsigned long long *count = _count;
+	struct test_array *local_ptr;
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	rcu_register_thread();
+
+	while (!test_go)
+	{
+	}
+	smp_mb();
+
+	for (;;) {
+		_rcu_read_lock();
+		local_ptr = _rcu_dereference(test_rcu_pointer);
+		debug_yield_read();
+		if (local_ptr)
+			assert(local_ptr->a == 8);
+		if (unlikely(rduration))
+			loop_sleep(rduration);
+		_rcu_read_unlock();
+		nr_reads++;
+		/* QS each 1024 reads */
+		if (unlikely((nr_reads & ((1 << 10) - 1)) == 0))
+			_rcu_quiescent_state();
+		if (unlikely(!test_duration_read()))
+			break;
+	}
+
+	rcu_unregister_thread();
+
+	*count = nr_reads;
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	return ((void*)1);
+
+}
+
+static void rcu_gc_clear_queue(unsigned long wtidx)
+{
+	void **p;
+
+	/* Wait for Q.S and empty queue */
+	synchronize_rcu();
+
+	for (p = pending_reclaims[wtidx].queue;
+			p < pending_reclaims[wtidx].head; p++) {
+		/* poison */
+		if (*p)
+			((struct test_array *)*p)->a = 0;
+		free(*p);
+	}
+	pending_reclaims[wtidx].head = pending_reclaims[wtidx].queue;
+}
+
+/* Using per-thread queue */
+static void rcu_gc_reclaim(unsigned long wtidx, void *old)
+{
+	/* Queue pointer */
+	*pending_reclaims[wtidx].head = old;
+	pending_reclaims[wtidx].head++;
+
+	if (likely(pending_reclaims[wtidx].head - pending_reclaims[wtidx].queue
+			< reclaim_batch))
+		return;
+
+	rcu_gc_clear_queue(wtidx);
+}
+
+void *thr_writer(void *data)
+{
+	unsigned long wtidx = (unsigned long)data;
+#ifdef TEST_LOCAL_GC
+	struct test_array *old = NULL;
+#else
+	struct test_array *new, *old;
+#endif
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	while (!test_go)
+	{
+	}
+	smp_mb();
+
+	for (;;) {
+#ifndef TEST_LOCAL_GC
+		new = malloc(sizeof(*new));
+		new->a = 8;
+		old = _rcu_xchg_pointer(&test_rcu_pointer, new);
+#endif
+		rcu_gc_reclaim(wtidx, old);
+		nr_writes++;
+		if (unlikely(!test_duration_write()))
+			break;
+		if (unlikely(wdelay))
+			loop_sleep(wdelay);
+	}
+
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	tot_nr_writes[wtidx] = nr_writes;
+	return ((void*)2);
+}
+
+void show_usage(int argc, char **argv)
+{
+	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
+#ifdef DEBUG_YIELD
+	printf(" [-r] [-w] (yield reader and/or writer)");
+#endif
+	printf(" [-d delay] (writer period (us))");
+	printf(" [-c duration] (reader C.S. duration (in loops))");
+	printf(" [-v] (verbose output)");
+	printf(" [-a cpu#] [-a cpu#]... (affinity)");
+	printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	int err;
+	pthread_t *tid_reader, *tid_writer;
+	void *tret;
+	unsigned long long *count_reader;
+	unsigned long long tot_reads = 0, tot_writes = 0;
+	int i, a;
+
+	if (argc < 4) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	err = sscanf(argv[1], "%u", &nr_readers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	err = sscanf(argv[2], "%u", &nr_writers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+	
+	err = sscanf(argv[3], "%lu", &duration);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	for (i = 4; i < argc; i++) {
+		if (argv[i][0] != '-')
+			continue;
+		switch (argv[i][1]) {
+#ifdef DEBUG_YIELD
+		case 'r':
+			yield_active |= YIELD_READ;
+			break;
+		case 'w':
+			yield_active |= YIELD_WRITE;
+			break;
+#endif
+		case 'a':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			a = atoi(argv[++i]);
+			cpu_affinities[next_aff++] = a;
+			use_affinity = 1;
+			printf_verbose("Adding CPU %d affinity\n", a);
+			break;
+		case 'b':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			reclaim_batch = atol(argv[++i]);
+			break;
+		case 'c':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			rduration = atol(argv[++i]);
+			break;
+		case 'd':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			wdelay = atol(argv[++i]);
+			break;
+		case 'v':
+			verbose_mode = 1;
+			break;
+		}
+	}
+
+	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
+		duration, nr_readers, nr_writers);
+	printf_verbose("Writer delay : %lu loops.\n", wdelay);
+	printf_verbose("Reader duration : %lu loops.\n", rduration);
+	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
+			"main", pthread_self(), (unsigned long)gettid());
+
+	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
+	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
+	count_reader = malloc(sizeof(*count_reader) * nr_readers);
+	tot_nr_writes = malloc(sizeof(*tot_nr_writes) * nr_writers);
+	pending_reclaims = malloc(sizeof(*pending_reclaims) * nr_writers);
+	if (reclaim_batch * sizeof(*pending_reclaims[i].queue)
+			< CACHE_LINE_SIZE)
+		for (i = 0; i < nr_writers; i++)
+			pending_reclaims[i].queue = calloc(1, CACHE_LINE_SIZE);
+	else
+		for (i = 0; i < nr_writers; i++)
+			pending_reclaims[i].queue = calloc(reclaim_batch,
+					sizeof(*pending_reclaims[i].queue));
+	for (i = 0; i < nr_writers; i++)
+		pending_reclaims[i].head = pending_reclaims[i].queue;
+
+	next_aff = 0;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_create(&tid_reader[i], NULL, thr_reader,
+				     &count_reader[i]);
+		if (err != 0)
+			exit(1);
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_create(&tid_writer[i], NULL, thr_writer,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+
+	smp_mb();
+
+	test_go = 1;
+
+	sleep(duration);
+
+	test_stop = 1;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_join(tid_reader[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_reads += count_reader[i];
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_join(tid_writer[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_writes += tot_nr_writes[i];
+		rcu_gc_clear_queue(i);
+	}
+	
+	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
+	       tot_writes);
+	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
+		"nr_writers %3u "
+		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu "
+		"batch %u\n",
+		argv[0], duration, nr_readers, rduration,
+		nr_writers, wdelay, tot_reads, tot_writes,
+		tot_reads + tot_writes, reclaim_batch);
+	free(tid_reader);
+	free(tid_writer);
+	free(count_reader);
+	free(tot_nr_writes);
+	for (i = 0; i < nr_writers; i++)
+		free(pending_reclaims[i].queue);
+	free(pending_reclaims);
+
+	return 0;
+}
diff --git a/tests/test_qsbr_timing.c b/tests/test_qsbr_timing.c
new file mode 100644
index 0000000..2a8963a
--- /dev/null
+++ b/tests/test_qsbr_timing.c
@@ -0,0 +1,240 @@
+/*
+ * test_qsbr_timing.c
+ *
+ * Userspace QSBR - test program
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include "../arch.h"
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+	return getpid();
+}
+#endif
+
+#define _LGPL_SOURCE
+#include "../urcu-qsbr.h"
+
+pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void rcu_copy_mutex_lock(void)
+{
+	int ret;
+	ret = pthread_mutex_lock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+	int ret;
+
+	ret = pthread_mutex_unlock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+}
+
+struct test_array {
+	int a;
+};
+
+static struct test_array *test_rcu_pointer;
+
+#define OUTER_READ_LOOP	2000U
+#define INNER_READ_LOOP	100000U
+#define READ_LOOP ((unsigned long long)OUTER_READ_LOOP * INNER_READ_LOOP)
+
+#define OUTER_WRITE_LOOP 10U
+#define INNER_WRITE_LOOP 200U
+#define WRITE_LOOP ((unsigned long long)OUTER_WRITE_LOOP * INNER_WRITE_LOOP)
+
+static int num_read;
+static int num_write;
+
+#define NR_READ num_read
+#define NR_WRITE num_write
+
+static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
+static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
+
+void *thr_reader(void *arg)
+{
+	int i, j;
+	struct test_array *local_ptr;
+	cycles_t time1, time2;
+
+	printf("thread_begin %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	sleep(2);
+
+	rcu_register_thread();
+
+	time1 = get_cycles();
+	for (i = 0; i < OUTER_READ_LOOP; i++) {
+		for (j = 0; j < INNER_READ_LOOP; j++) {
+			_rcu_read_lock();
+			local_ptr = _rcu_dereference(test_rcu_pointer);
+			if (local_ptr) {
+				assert(local_ptr->a == 8);
+			}
+			_rcu_read_unlock();
+		}
+		_rcu_quiescent_state();
+	}
+	time2 = get_cycles();
+
+	rcu_unregister_thread();
+
+	reader_time[(unsigned long)arg] = time2 - time1;
+
+	sleep(2);
+	printf("thread_end %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	return ((void*)1);
+
+}
+
+void *thr_writer(void *arg)
+{
+	int i, j;
+	struct test_array *new, *old;
+	cycles_t time1, time2;
+
+	printf("thread_begin %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	sleep(2);
+
+	for (i = 0; i < OUTER_WRITE_LOOP; i++) {
+		for (j = 0; j < INNER_WRITE_LOOP; j++) {
+			time1 = get_cycles();
+			new = malloc(sizeof(struct test_array));
+			rcu_copy_mutex_lock();
+			old = test_rcu_pointer;
+			if (old) {
+				assert(old->a == 8);
+			}
+			new->a = 8;
+			old = _rcu_publish_content(&test_rcu_pointer, new);
+			rcu_copy_mutex_unlock();
+			/* can be done after unlock */
+			if (old) {
+				old->a = 0;
+			}
+			free(old);
+			time2 = get_cycles();
+			writer_time[(unsigned long)arg] += time2 - time1;
+			usleep(1);
+		}
+	}
+
+	printf("thread_end %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	return ((void*)2);
+}
+
+int main(int argc, char **argv)
+{
+	int err;
+	pthread_t *tid_reader, *tid_writer;
+	void *tret;
+	int i;
+	cycles_t tot_rtime = 0;
+	cycles_t tot_wtime = 0;
+
+	if (argc < 2) {
+		printf("Usage : %s nr_readers nr_writers\n", argv[0]);
+		exit(-1);
+	}
+	num_read = atoi(argv[1]);
+	num_write = atoi(argv[2]);
+
+	reader_time = malloc(sizeof(*reader_time) * num_read);
+	writer_time = malloc(sizeof(*writer_time) * num_write);
+	tid_reader = malloc(sizeof(*tid_reader) * num_read);
+	tid_writer = malloc(sizeof(*tid_writer) * num_write);
+
+	printf("thread %-6s, thread id : %lx, tid %lu\n",
+			"main", pthread_self(), (unsigned long)gettid());
+
+	for (i = 0; i < NR_READ; i++) {
+		err = pthread_create(&tid_reader[i], NULL, thr_reader,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+	for (i = 0; i < NR_WRITE; i++) {
+		err = pthread_create(&tid_writer[i], NULL, thr_writer,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+
+	sleep(10);
+
+	for (i = 0; i < NR_READ; i++) {
+		err = pthread_join(tid_reader[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_rtime += reader_time[i];
+	}
+	for (i = 0; i < NR_WRITE; i++) {
+		err = pthread_join(tid_writer[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_wtime += writer_time[i];
+	}
+	free(test_rcu_pointer);
+	printf("Time per read : %g cycles\n",
+	       (double)tot_rtime / ((double)NR_READ * (double)READ_LOOP));
+	printf("Time per write : %g cycles\n",
+	       (double)tot_wtime / ((double)NR_WRITE * (double)WRITE_LOOP));
+
+	free(reader_time);
+	free(writer_time);
+	free(tid_reader);
+	free(tid_writer);
+
+	return 0;
+}
diff --git a/tests/test_rwlock.c b/tests/test_rwlock.c
new file mode 100644
index 0000000..c7edd32
--- /dev/null
+++ b/tests/test_rwlock.c
@@ -0,0 +1,383 @@
+/*
+ * test_urcu.c
+ *
+ * Userspace RCU library - test program
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sched.h>
+
+#include "../arch.h"
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+/* hardcoded number of CPUs */
+#define NR_CPUS 16384
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+	return getpid();
+}
+#endif
+
+#ifndef DYNAMIC_LINK_TEST
+#define _LGPL_SOURCE
+#else
+#define debug_yield_read()
+#endif
+#include "../urcu.h"
+
+struct test_array {
+	int a;
+};
+
+pthread_rwlock_t lock = PTHREAD_RWLOCK_INITIALIZER;
+
+static volatile int test_go, test_stop;
+
+static unsigned long wdelay;
+
+static volatile struct test_array test_array = { 8 };
+
+static unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+static unsigned long rduration;
+
+static inline void loop_sleep(unsigned long l)
+{
+	while(l-- != 0)
+		cpu_relax();
+}
+
+static int verbose_mode;
+
+#define printf_verbose(fmt, args...)		\
+	do {					\
+		if (verbose_mode)		\
+			printf(fmt, args);	\
+	} while (0)
+
+static unsigned int cpu_affinities[NR_CPUS];
+static unsigned int next_aff = 0;
+static int use_affinity = 0;
+
+pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void set_affinity(void)
+{
+	cpu_set_t mask;
+	int cpu;
+	int ret;
+
+	if (!use_affinity)
+		return;
+
+	ret = pthread_mutex_lock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+	cpu = cpu_affinities[next_aff++];
+	ret = pthread_mutex_unlock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+/*
+ * returns 0 if test should end.
+ */
+static int test_duration_write(void)
+{
+	return !test_stop;
+}
+
+static int test_duration_read(void)
+{
+	return !test_stop;
+}
+
+static unsigned long long __thread nr_writes;
+static unsigned long long __thread nr_reads;
+
+static unsigned int nr_readers;
+static unsigned int nr_writers;
+
+pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void rcu_copy_mutex_lock(void)
+{
+	int ret;
+	ret = pthread_mutex_lock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+	int ret;
+
+	ret = pthread_mutex_unlock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+}
+
+void *thr_reader(void *_count)
+{
+	unsigned long long *count = _count;
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	while (!test_go)
+	{
+	}
+
+	for (;;) {
+		pthread_rwlock_rdlock(&lock);
+		assert(test_array.a == 8);
+		if (unlikely(rduration))
+			loop_sleep(rduration);
+		pthread_rwlock_unlock(&lock);
+		nr_reads++;
+		if (unlikely(!test_duration_read()))
+			break;
+	}
+
+	*count = nr_reads;
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	return ((void*)1);
+
+}
+
+void *thr_writer(void *_count)
+{
+	unsigned long long *count = _count;
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	while (!test_go)
+	{
+	}
+	smp_mb();
+
+	for (;;) {
+		pthread_rwlock_wrlock(&lock);
+		test_array.a = 0;
+		test_array.a = 8;
+		pthread_rwlock_unlock(&lock);
+		nr_writes++;
+		if (unlikely(!test_duration_write()))
+			break;
+		if (unlikely(wdelay))
+			loop_sleep(wdelay);
+	}
+
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	*count = nr_writes;
+	return ((void*)2);
+}
+
+void show_usage(int argc, char **argv)
+{
+	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
+#ifdef DEBUG_YIELD
+	printf(" [-r] [-w] (yield reader and/or writer)");
+#endif
+	printf(" [-d delay] (writer period (us))");
+	printf(" [-c duration] (reader C.S. duration (in loops))");
+	printf(" [-v] (verbose output)");
+	printf(" [-a cpu#] [-a cpu#]... (affinity)");
+	printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	int err;
+	pthread_t *tid_reader, *tid_writer;
+	void *tret;
+	unsigned long long *count_reader, *count_writer;
+	unsigned long long tot_reads = 0, tot_writes = 0;
+	int i, a;
+
+	if (argc < 4) {
+		show_usage(argc, argv);
+		return -1;
+	}
+	smp_mb();
+
+	err = sscanf(argv[1], "%u", &nr_readers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	err = sscanf(argv[2], "%u", &nr_writers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+	
+	err = sscanf(argv[3], "%lu", &duration);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	for (i = 4; i < argc; i++) {
+		if (argv[i][0] != '-')
+			continue;
+		switch (argv[i][1]) {
+#ifdef DEBUG_YIELD
+		case 'r':
+			yield_active |= YIELD_READ;
+			break;
+		case 'w':
+			yield_active |= YIELD_WRITE;
+			break;
+#endif
+		case 'a':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			a = atoi(argv[++i]);
+			cpu_affinities[next_aff++] = a;
+			use_affinity = 1;
+			printf_verbose("Adding CPU %d affinity\n", a);
+			break;
+		case 'c':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			rduration = atol(argv[++i]);
+			break;
+		case 'd':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			wdelay = atol(argv[++i]);
+			break;
+		case 'v':
+			verbose_mode = 1;
+			break;
+		}
+	}
+
+	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
+		duration, nr_readers, nr_writers);
+	printf_verbose("Writer delay : %lu loops.\n", wdelay);
+	printf_verbose("Reader duration : %lu loops.\n", rduration);
+	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
+			"main", pthread_self(), (unsigned long)gettid());
+
+	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
+	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
+	count_reader = malloc(sizeof(*count_reader) * nr_readers);
+	count_writer = malloc(sizeof(*count_writer) * nr_writers);
+
+	next_aff = 0;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_create(&tid_reader[i], NULL, thr_reader,
+				     &count_reader[i]);
+		if (err != 0)
+			exit(1);
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_create(&tid_writer[i], NULL, thr_writer,
+				     &count_writer[i]);
+		if (err != 0)
+			exit(1);
+	}
+
+	smp_mb();
+
+	test_go = 1;
+
+	sleep(duration);
+
+	test_stop = 1;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_join(tid_reader[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_reads += count_reader[i];
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_join(tid_writer[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_writes += count_writer[i];
+	}
+
+	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
+	       tot_writes);
+	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
+		"nr_writers %3u "
+		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu\n",
+		argv[0], duration, nr_readers, rduration,
+		nr_writers, wdelay, tot_reads, tot_writes,
+		tot_reads + tot_writes);
+
+	free(tid_reader);
+	free(tid_writer);
+	free(count_reader);
+	free(count_writer);
+	return 0;
+}
diff --git a/tests/test_rwlock_timing.c b/tests/test_rwlock_timing.c
new file mode 100644
index 0000000..5bc93d3
--- /dev/null
+++ b/tests/test_rwlock_timing.c
@@ -0,0 +1,197 @@
+/*
+ * test_urcu.c
+ *
+ * Userspace RCU library - test program
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <pthread.h>
+#include "../arch.h"
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+	return getpid();
+}
+#endif
+
+#include "../urcu.h"
+
+struct test_array {
+	int a;
+};
+
+pthread_rwlock_t lock = PTHREAD_RWLOCK_INITIALIZER;
+
+static struct test_array test_array = { 8 };
+
+#define OUTER_READ_LOOP	200U
+#define INNER_READ_LOOP	100000U
+#define READ_LOOP ((unsigned long long)OUTER_READ_LOOP * INNER_READ_LOOP)
+
+#define OUTER_WRITE_LOOP 10U
+#define INNER_WRITE_LOOP 200U
+#define WRITE_LOOP ((unsigned long long)OUTER_WRITE_LOOP * INNER_WRITE_LOOP)
+
+static int num_read;
+static int num_write;
+
+#define NR_READ num_read
+#define NR_WRITE num_write
+
+static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
+static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
+
+void *thr_reader(void *arg)
+{
+	int i, j;
+	cycles_t time1, time2;
+
+	printf("thread_begin %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	sleep(2);
+
+	time1 = get_cycles();
+	for (i = 0; i < OUTER_READ_LOOP; i++) {
+		for (j = 0; j < INNER_READ_LOOP; j++) {
+			pthread_rwlock_rdlock(&lock);
+			assert(test_array.a == 8);
+			pthread_rwlock_unlock(&lock);
+		}
+	}
+	time2 = get_cycles();
+
+	reader_time[(unsigned long)arg] = time2 - time1;
+
+	sleep(2);
+	printf("thread_end %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	return ((void*)1);
+
+}
+
+void *thr_writer(void *arg)
+{
+	int i, j;
+	cycles_t time1, time2;
+
+	printf("thread_begin %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	sleep(2);
+
+	for (i = 0; i < OUTER_WRITE_LOOP; i++) {
+		for (j = 0; j < INNER_WRITE_LOOP; j++) {
+			time1 = get_cycles();
+			pthread_rwlock_wrlock(&lock);
+			test_array.a = 8;
+			pthread_rwlock_unlock(&lock);
+			time2 = get_cycles();
+			writer_time[(unsigned long)arg] += time2 - time1;
+			usleep(1);
+		}
+	}
+
+	printf("thread_end %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	return ((void*)2);
+}
+
+int main(int argc, char **argv)
+{
+	int err;
+	pthread_t *tid_reader, *tid_writer;
+	void *tret;
+	int i;
+	cycles_t tot_rtime = 0;
+	cycles_t tot_wtime = 0;
+
+	if (argc < 2) {
+		printf("Usage : %s nr_readers nr_writers\n", argv[0]);
+		exit(-1);
+	}
+	num_read = atoi(argv[1]);
+	num_write = atoi(argv[2]);
+
+	reader_time = malloc(sizeof(*reader_time) * num_read);
+	writer_time = malloc(sizeof(*writer_time) * num_write);
+	tid_reader = malloc(sizeof(*tid_reader) * num_read);
+	tid_writer = malloc(sizeof(*tid_writer) * num_write);
+
+	printf("thread %-6s, thread id : %lx, tid %lu\n",
+			"main", pthread_self(), (unsigned long)gettid());
+
+	for (i = 0; i < NR_READ; i++) {
+		err = pthread_create(&tid_reader[i], NULL, thr_reader,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+	for (i = 0; i < NR_WRITE; i++) {
+		err = pthread_create(&tid_writer[i], NULL, thr_writer,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+
+	sleep(10);
+
+	for (i = 0; i < NR_READ; i++) {
+		err = pthread_join(tid_reader[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_rtime += reader_time[i];
+	}
+	for (i = 0; i < NR_WRITE; i++) {
+		err = pthread_join(tid_writer[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_wtime += writer_time[i];
+	}
+	printf("Time per read : %g cycles\n",
+	       (double)tot_rtime / ((double)NR_READ * (double)READ_LOOP));
+	printf("Time per write : %g cycles\n",
+	       (double)tot_wtime / ((double)NR_WRITE * (double)WRITE_LOOP));
+
+	free(reader_time);
+	free(writer_time);
+	free(tid_reader);
+	free(tid_writer);
+
+	return 0;
+}
diff --git a/tests/test_urcu.c b/tests/test_urcu.c
new file mode 100644
index 0000000..18683bf
--- /dev/null
+++ b/tests/test_urcu.c
@@ -0,0 +1,430 @@
+/*
+ * test_urcu.c
+ *
+ * Userspace RCU library - test program
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sched.h>
+
+#include "../arch.h"
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+/* hardcoded number of CPUs */
+#define NR_CPUS 16384
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+	return getpid();
+}
+#endif
+
+#ifndef DYNAMIC_LINK_TEST
+#define _LGPL_SOURCE
+#else
+#define debug_yield_read()
+#endif
+#include "../urcu.h"
+
+struct test_array {
+	int a;
+};
+
+static volatile int test_go, test_stop;
+
+static unsigned long wdelay;
+
+static struct test_array *test_rcu_pointer;
+
+static unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+static unsigned long rduration;
+
+static inline void loop_sleep(unsigned long l)
+{
+	while(l-- != 0)
+		cpu_relax();
+}
+
+static int verbose_mode;
+
+#define printf_verbose(fmt, args...)		\
+	do {					\
+		if (verbose_mode)		\
+			printf(fmt, args);	\
+	} while (0)
+
+static unsigned int cpu_affinities[NR_CPUS];
+static unsigned int next_aff = 0;
+static int use_affinity = 0;
+
+pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void set_affinity(void)
+{
+	cpu_set_t mask;
+	int cpu;
+	int ret;
+
+	if (!use_affinity)
+		return;
+
+	ret = pthread_mutex_lock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+	cpu = cpu_affinities[next_aff++];
+	ret = pthread_mutex_unlock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+/*
+ * returns 0 if test should end.
+ */
+static int test_duration_write(void)
+{
+	return !test_stop;
+}
+
+static int test_duration_read(void)
+{
+	return !test_stop;
+}
+
+static unsigned long long __thread nr_writes;
+static unsigned long long __thread nr_reads;
+
+static unsigned int nr_readers;
+static unsigned int nr_writers;
+
+pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void rcu_copy_mutex_lock(void)
+{
+	int ret;
+	ret = pthread_mutex_lock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+	int ret;
+
+	ret = pthread_mutex_unlock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+}
+
+/*
+ * malloc/free are reusing memory areas too quickly, which does not let us
+ * test races appropriately. Use a large circular array for allocations.
+ * ARRAY_SIZE is larger than nr_writers, which insures we never run over our tail.
+ */
+#define ARRAY_SIZE (1048576 * nr_writers)
+#define ARRAY_POISON 0xDEADBEEF
+static int array_index;
+static struct test_array *test_array;
+
+static struct test_array *test_array_alloc(void)
+{
+	struct test_array *ret;
+	int index;
+
+	rcu_copy_mutex_lock();
+	index = array_index % ARRAY_SIZE;
+	assert(test_array[index].a == ARRAY_POISON ||
+		test_array[index].a == 0);
+	ret = &test_array[index];
+	array_index++;
+	if (array_index == ARRAY_SIZE)
+		array_index = 0;
+	rcu_copy_mutex_unlock();
+	return ret;
+}
+
+static void test_array_free(struct test_array *ptr)
+{
+	if (!ptr)
+		return;
+	rcu_copy_mutex_lock();
+	ptr->a = ARRAY_POISON;
+	rcu_copy_mutex_unlock();
+}
+
+void *thr_reader(void *_count)
+{
+	unsigned long long *count = _count;
+	struct test_array *local_ptr;
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	rcu_register_thread();
+
+	while (!test_go)
+	{
+	}
+	smp_mb();
+
+	for (;;) {
+		rcu_read_lock();
+		local_ptr = rcu_dereference(test_rcu_pointer);
+		debug_yield_read();
+		if (local_ptr)
+			assert(local_ptr->a == 8);
+		if (unlikely(rduration))
+			loop_sleep(rduration);
+		rcu_read_unlock();
+		nr_reads++;
+		if (unlikely(!test_duration_read()))
+			break;
+	}
+
+	rcu_unregister_thread();
+
+	*count = nr_reads;
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	return ((void*)1);
+
+}
+
+void *thr_writer(void *_count)
+{
+	unsigned long long *count = _count;
+	struct test_array *new, *old;
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	while (!test_go)
+	{
+	}
+	smp_mb();
+
+	for (;;) {
+		new = test_array_alloc();
+		new->a = 8;
+		old = rcu_publish_content(&test_rcu_pointer, new);
+		if (old)
+			old->a = 0;
+		test_array_free(old);
+		nr_writes++;
+		if (unlikely(!test_duration_write()))
+			break;
+		if (unlikely(wdelay))
+			loop_sleep(wdelay);
+	}
+
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	*count = nr_writes;
+	return ((void*)2);
+}
+
+void show_usage(int argc, char **argv)
+{
+	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
+#ifdef DEBUG_YIELD
+	printf(" [-r] [-w] (yield reader and/or writer)");
+#endif
+	printf(" [-d delay] (writer period (us))");
+	printf(" [-c duration] (reader C.S. duration (in loops))");
+	printf(" [-v] (verbose output)");
+	printf(" [-a cpu#] [-a cpu#]... (affinity)");
+	printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	int err;
+	pthread_t *tid_reader, *tid_writer;
+	void *tret;
+	unsigned long long *count_reader, *count_writer;
+	unsigned long long tot_reads = 0, tot_writes = 0;
+	int i, a;
+
+	if (argc < 4) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	err = sscanf(argv[1], "%u", &nr_readers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	err = sscanf(argv[2], "%u", &nr_writers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+	
+	err = sscanf(argv[3], "%lu", &duration);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	for (i = 4; i < argc; i++) {
+		if (argv[i][0] != '-')
+			continue;
+		switch (argv[i][1]) {
+#ifdef DEBUG_YIELD
+		case 'r':
+			yield_active |= YIELD_READ;
+			break;
+		case 'w':
+			yield_active |= YIELD_WRITE;
+			break;
+#endif
+		case 'a':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			a = atoi(argv[++i]);
+			cpu_affinities[next_aff++] = a;
+			use_affinity = 1;
+			printf_verbose("Adding CPU %d affinity\n", a);
+			break;
+		case 'c':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			rduration = atol(argv[++i]);
+			break;
+		case 'd':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			wdelay = atol(argv[++i]);
+			break;
+		case 'v':
+			verbose_mode = 1;
+			break;
+		}
+	}
+
+	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
+		duration, nr_readers, nr_writers);
+	printf_verbose("Writer delay : %lu loops.\n", wdelay);
+	printf_verbose("Reader duration : %lu loops.\n", rduration);
+	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
+			"main", pthread_self(), (unsigned long)gettid());
+
+	test_array = malloc(sizeof(*test_array) * ARRAY_SIZE);
+	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
+	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
+	count_reader = malloc(sizeof(*count_reader) * nr_readers);
+	count_writer = malloc(sizeof(*count_writer) * nr_writers);
+
+	next_aff = 0;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_create(&tid_reader[i], NULL, thr_reader,
+				     &count_reader[i]);
+		if (err != 0)
+			exit(1);
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_create(&tid_writer[i], NULL, thr_writer,
+				     &count_writer[i]);
+		if (err != 0)
+			exit(1);
+	}
+
+	smp_mb();
+
+	test_go = 1;
+
+	sleep(duration);
+
+	test_stop = 1;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_join(tid_reader[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_reads += count_reader[i];
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_join(tid_writer[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_writes += count_writer[i];
+	}
+	
+	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
+	       tot_writes);
+	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
+		"nr_writers %3u "
+		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu\n",
+		argv[0], duration, nr_readers, rduration,
+		nr_writers, wdelay, tot_reads, tot_writes,
+		tot_reads + tot_writes);
+	test_array_free(test_rcu_pointer);
+	free(test_array);
+	free(tid_reader);
+	free(tid_writer);
+	free(count_reader);
+	free(count_writer);
+	return 0;
+}
diff --git a/tests/test_urcu_gc.c b/tests/test_urcu_gc.c
new file mode 100644
index 0000000..213c68b
--- /dev/null
+++ b/tests/test_urcu_gc.c
@@ -0,0 +1,462 @@
+/*
+ * test_urcu_gc.c
+ *
+ * Userspace RCU library - test program (with baatch reclamation)
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sched.h>
+
+#include "../arch.h"
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+/* hardcoded number of CPUs */
+#define NR_CPUS 16384
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+	return getpid();
+}
+#endif
+
+#ifndef DYNAMIC_LINK_TEST
+#define _LGPL_SOURCE
+#else
+#define debug_yield_read()
+#endif
+#include "../urcu.h"
+
+struct test_array {
+	int a;
+};
+
+static volatile int test_go, test_stop;
+
+static unsigned long wdelay;
+
+static struct test_array *test_rcu_pointer;
+
+static unsigned int reclaim_batch = 1;
+
+struct reclaim_queue {
+	void **queue;	/* Beginning of queue */
+	void **head;	/* Insert position */
+};
+
+static struct reclaim_queue *pending_reclaims;
+
+static unsigned long duration;
+
+/* read-side C.S. duration, in loops */
+static unsigned long rduration;
+
+static inline void loop_sleep(unsigned long l)
+{
+	while(l-- != 0)
+		cpu_relax();
+}
+
+static int verbose_mode;
+
+#define printf_verbose(fmt, args...)		\
+	do {					\
+		if (verbose_mode)		\
+			printf(fmt, args);	\
+	} while (0)
+
+static unsigned int cpu_affinities[NR_CPUS];
+static unsigned int next_aff = 0;
+static int use_affinity = 0;
+
+pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void set_affinity(void)
+{
+	cpu_set_t mask;
+	int cpu;
+	int ret;
+
+	if (!use_affinity)
+		return;
+
+	ret = pthread_mutex_lock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+	cpu = cpu_affinities[next_aff++];
+	ret = pthread_mutex_unlock(&affinity_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+/*
+ * returns 0 if test should end.
+ */
+static int test_duration_write(void)
+{
+	return !test_stop;
+}
+
+static int test_duration_read(void)
+{
+	return !test_stop;
+}
+
+static unsigned long long __thread nr_writes;
+static unsigned long long __thread nr_reads;
+
+static
+unsigned long long __attribute__((aligned(CACHE_LINE_SIZE))) *tot_nr_writes;
+
+static unsigned int nr_readers;
+static unsigned int nr_writers;
+
+pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void rcu_copy_mutex_lock(void)
+{
+	int ret;
+	ret = pthread_mutex_lock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+	int ret;
+
+	ret = pthread_mutex_unlock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+}
+
+void *thr_reader(void *_count)
+{
+	unsigned long long *count = _count;
+	struct test_array *local_ptr;
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	rcu_register_thread();
+
+	while (!test_go)
+	{
+	}
+	smp_mb();
+
+	for (;;) {
+		rcu_read_lock();
+		local_ptr = rcu_dereference(test_rcu_pointer);
+		debug_yield_read();
+		if (local_ptr)
+			assert(local_ptr->a == 8);
+		if (unlikely(rduration))
+			loop_sleep(rduration);
+		rcu_read_unlock();
+		nr_reads++;
+		if (unlikely(!test_duration_read()))
+			break;
+	}
+
+	rcu_unregister_thread();
+
+	*count = nr_reads;
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	return ((void*)1);
+
+}
+
+static void rcu_gc_clear_queue(unsigned long wtidx)
+{
+	void **p;
+
+	/* Wait for Q.S and empty queue */
+	synchronize_rcu();
+
+	for (p = pending_reclaims[wtidx].queue;
+			p < pending_reclaims[wtidx].head; p++) {
+		/* poison */
+		if (*p)
+			((struct test_array *)*p)->a = 0;
+		free(*p);
+	}
+	pending_reclaims[wtidx].head = pending_reclaims[wtidx].queue;
+}
+
+/* Using per-thread queue */
+static void rcu_gc_reclaim(unsigned long wtidx, void *old)
+{
+	/* Queue pointer */
+	*pending_reclaims[wtidx].head = old;
+	pending_reclaims[wtidx].head++;
+
+	if (likely(pending_reclaims[wtidx].head - pending_reclaims[wtidx].queue
+			< reclaim_batch))
+		return;
+
+	rcu_gc_clear_queue(wtidx);
+}
+
+void *thr_writer(void *data)
+{
+	unsigned long wtidx = (unsigned long)data;
+#ifdef TEST_LOCAL_GC
+	struct test_array *old = NULL;
+#else
+	struct test_array *new, *old;
+#endif
+
+	printf_verbose("thread_begin %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+
+	set_affinity();
+
+	while (!test_go)
+	{
+	}
+	smp_mb();
+
+	for (;;) {
+#ifndef TEST_LOCAL_GC
+		new = malloc(sizeof(*new));
+		new->a = 8;
+		old = rcu_xchg_pointer(&test_rcu_pointer, new);
+#endif
+		rcu_gc_reclaim(wtidx, old);
+		nr_writes++;
+		if (unlikely(!test_duration_write()))
+			break;
+		if (unlikely(wdelay))
+			loop_sleep(wdelay);
+	}
+
+	printf_verbose("thread_end %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	tot_nr_writes[wtidx] = nr_writes;
+	return ((void*)2);
+}
+
+void show_usage(int argc, char **argv)
+{
+	printf("Usage : %s nr_readers nr_writers duration (s)", argv[0]);
+#ifdef DEBUG_YIELD
+	printf(" [-r] [-w] (yield reader and/or writer)");
+#endif
+	printf(" [-d delay] (writer period (us))");
+	printf(" [-c duration] (reader C.S. duration (in loops))");
+	printf(" [-v] (verbose output)");
+	printf(" [-a cpu#] [-a cpu#]... (affinity)");
+	printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	int err;
+	pthread_t *tid_reader, *tid_writer;
+	void *tret;
+	unsigned long long *count_reader;
+	unsigned long long tot_reads = 0, tot_writes = 0;
+	int i, a;
+
+	if (argc < 4) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	err = sscanf(argv[1], "%u", &nr_readers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	err = sscanf(argv[2], "%u", &nr_writers);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+	
+	err = sscanf(argv[3], "%lu", &duration);
+	if (err != 1) {
+		show_usage(argc, argv);
+		return -1;
+	}
+
+	for (i = 4; i < argc; i++) {
+		if (argv[i][0] != '-')
+			continue;
+		switch (argv[i][1]) {
+#ifdef DEBUG_YIELD
+		case 'r':
+			yield_active |= YIELD_READ;
+			break;
+		case 'w':
+			yield_active |= YIELD_WRITE;
+			break;
+#endif
+		case 'a':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			a = atoi(argv[++i]);
+			cpu_affinities[next_aff++] = a;
+			use_affinity = 1;
+			printf_verbose("Adding CPU %d affinity\n", a);
+			break;
+		case 'b':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			reclaim_batch = atol(argv[++i]);
+			break;
+		case 'c':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			rduration = atol(argv[++i]);
+			break;
+		case 'd':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				return -1;
+			}
+			wdelay = atol(argv[++i]);
+			break;
+		case 'v':
+			verbose_mode = 1;
+			break;
+		}
+	}
+
+	printf_verbose("running test for %lu seconds, %u readers, %u writers.\n",
+		duration, nr_readers, nr_writers);
+	printf_verbose("Writer delay : %lu loops.\n", wdelay);
+	printf_verbose("Reader duration : %lu loops.\n", rduration);
+	printf_verbose("thread %-6s, thread id : %lx, tid %lu\n",
+			"main", pthread_self(), (unsigned long)gettid());
+
+	tid_reader = malloc(sizeof(*tid_reader) * nr_readers);
+	tid_writer = malloc(sizeof(*tid_writer) * nr_writers);
+	count_reader = malloc(sizeof(*count_reader) * nr_readers);
+	tot_nr_writes = malloc(sizeof(*tot_nr_writes) * nr_writers);
+	pending_reclaims = malloc(sizeof(*pending_reclaims) * nr_writers);
+	if (reclaim_batch * sizeof(*pending_reclaims[i].queue)
+			< CACHE_LINE_SIZE)
+		for (i = 0; i < nr_writers; i++)
+			pending_reclaims[i].queue = calloc(1, CACHE_LINE_SIZE);
+	else
+		for (i = 0; i < nr_writers; i++)
+			pending_reclaims[i].queue = calloc(reclaim_batch,
+					sizeof(*pending_reclaims[i].queue));
+	for (i = 0; i < nr_writers; i++)
+		pending_reclaims[i].head = pending_reclaims[i].queue;
+
+	next_aff = 0;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_create(&tid_reader[i], NULL, thr_reader,
+				     &count_reader[i]);
+		if (err != 0)
+			exit(1);
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_create(&tid_writer[i], NULL, thr_writer,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+
+	smp_mb();
+
+	test_go = 1;
+
+	sleep(duration);
+
+	test_stop = 1;
+
+	for (i = 0; i < nr_readers; i++) {
+		err = pthread_join(tid_reader[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_reads += count_reader[i];
+	}
+	for (i = 0; i < nr_writers; i++) {
+		err = pthread_join(tid_writer[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_writes += tot_nr_writes[i];
+		rcu_gc_clear_queue(i);
+	}
+	
+	printf_verbose("total number of reads : %llu, writes %llu\n", tot_reads,
+	       tot_writes);
+	printf("SUMMARY %-25s testdur %4lu nr_readers %3u rdur %6lu "
+		"nr_writers %3u "
+		"wdelay %6lu nr_reads %12llu nr_writes %12llu nr_ops %12llu "
+		"batch %u\n",
+		argv[0], duration, nr_readers, rduration,
+		nr_writers, wdelay, tot_reads, tot_writes,
+		tot_reads + tot_writes, reclaim_batch);
+	free(tid_reader);
+	free(tid_writer);
+	free(count_reader);
+	free(tot_nr_writes);
+	for (i = 0; i < nr_writers; i++)
+		free(pending_reclaims[i].queue);
+	free(pending_reclaims);
+
+	return 0;
+}
diff --git a/tests/test_urcu_timing.c b/tests/test_urcu_timing.c
new file mode 100644
index 0000000..a3ca783
--- /dev/null
+++ b/tests/test_urcu_timing.c
@@ -0,0 +1,239 @@
+/*
+ * test_urcu.c
+ *
+ * Userspace RCU library - test program
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include "../arch.h"
+
+/* Make this big enough to include the POWER5+ L3 cacheline size of 256B */
+#define CACHE_LINE_SIZE 4096
+
+#if defined(_syscall0)
+_syscall0(pid_t, gettid)
+#elif defined(__NR_gettid)
+static inline pid_t gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+#else
+#warning "use pid as tid"
+static inline pid_t gettid(void)
+{
+	return getpid();
+}
+#endif
+
+#define _LGPL_SOURCE
+#include "../urcu.h"
+
+pthread_mutex_t rcu_copy_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void rcu_copy_mutex_lock(void)
+{
+	int ret;
+	ret = pthread_mutex_lock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex lock");
+		exit(-1);
+	}
+}
+
+void rcu_copy_mutex_unlock(void)
+{
+	int ret;
+
+	ret = pthread_mutex_unlock(&rcu_copy_mutex);
+	if (ret) {
+		perror("Error in pthread mutex unlock");
+		exit(-1);
+	}
+}
+
+struct test_array {
+	int a;
+};
+
+static struct test_array *test_rcu_pointer;
+
+#define OUTER_READ_LOOP	2000U
+#define INNER_READ_LOOP	100000U
+#define READ_LOOP ((unsigned long long)OUTER_READ_LOOP * INNER_READ_LOOP)
+
+#define OUTER_WRITE_LOOP 10U
+#define INNER_WRITE_LOOP 200U
+#define WRITE_LOOP ((unsigned long long)OUTER_WRITE_LOOP * INNER_WRITE_LOOP)
+
+static int num_read;
+static int num_write;
+
+#define NR_READ num_read
+#define NR_WRITE num_write
+
+static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *reader_time;
+static cycles_t __attribute__((aligned(CACHE_LINE_SIZE))) *writer_time;
+
+void *thr_reader(void *arg)
+{
+	int i, j;
+	struct test_array *local_ptr;
+	cycles_t time1, time2;
+
+	printf("thread_begin %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	sleep(2);
+
+	rcu_register_thread();
+
+	time1 = get_cycles();
+	for (i = 0; i < OUTER_READ_LOOP; i++) {
+		for (j = 0; j < INNER_READ_LOOP; j++) {
+			rcu_read_lock();
+			local_ptr = rcu_dereference(test_rcu_pointer);
+			if (local_ptr) {
+				assert(local_ptr->a == 8);
+			}
+			rcu_read_unlock();
+		}
+	}
+	time2 = get_cycles();
+
+	rcu_unregister_thread();
+
+	reader_time[(unsigned long)arg] = time2 - time1;
+
+	sleep(2);
+	printf("thread_end %s, thread id : %lx, tid %lu\n",
+			"reader", pthread_self(), (unsigned long)gettid());
+	return ((void*)1);
+
+}
+
+void *thr_writer(void *arg)
+{
+	int i, j;
+	struct test_array *new, *old;
+	cycles_t time1, time2;
+
+	printf("thread_begin %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	sleep(2);
+
+	for (i = 0; i < OUTER_WRITE_LOOP; i++) {
+		for (j = 0; j < INNER_WRITE_LOOP; j++) {
+			time1 = get_cycles();
+			new = malloc(sizeof(struct test_array));
+			rcu_copy_mutex_lock();
+			old = test_rcu_pointer;
+			if (old) {
+				assert(old->a == 8);
+			}
+			new->a = 8;
+			old = rcu_publish_content(&test_rcu_pointer, new);
+			rcu_copy_mutex_unlock();
+			/* can be done after unlock */
+			if (old) {
+				old->a = 0;
+			}
+			free(old);
+			time2 = get_cycles();
+			writer_time[(unsigned long)arg] += time2 - time1;
+			usleep(1);
+		}
+	}
+
+	printf("thread_end %s, thread id : %lx, tid %lu\n",
+			"writer", pthread_self(), (unsigned long)gettid());
+	return ((void*)2);
+}
+
+int main(int argc, char **argv)
+{
+	int err;
+	pthread_t *tid_reader, *tid_writer;
+	void *tret;
+	int i;
+	cycles_t tot_rtime = 0;
+	cycles_t tot_wtime = 0;
+
+	if (argc < 2) {
+		printf("Usage : %s nr_readers nr_writers\n", argv[0]);
+		exit(-1);
+	}
+	num_read = atoi(argv[1]);
+	num_write = atoi(argv[2]);
+
+	reader_time = malloc(sizeof(*reader_time) * num_read);
+	writer_time = malloc(sizeof(*writer_time) * num_write);
+	tid_reader = malloc(sizeof(*tid_reader) * num_read);
+	tid_writer = malloc(sizeof(*tid_writer) * num_write);
+
+	printf("thread %-6s, thread id : %lx, tid %lu\n",
+			"main", pthread_self(), (unsigned long)gettid());
+
+	for (i = 0; i < NR_READ; i++) {
+		err = pthread_create(&tid_reader[i], NULL, thr_reader,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+	for (i = 0; i < NR_WRITE; i++) {
+		err = pthread_create(&tid_writer[i], NULL, thr_writer,
+				     (void *)(long)i);
+		if (err != 0)
+			exit(1);
+	}
+
+	sleep(10);
+
+	for (i = 0; i < NR_READ; i++) {
+		err = pthread_join(tid_reader[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_rtime += reader_time[i];
+	}
+	for (i = 0; i < NR_WRITE; i++) {
+		err = pthread_join(tid_writer[i], &tret);
+		if (err != 0)
+			exit(1);
+		tot_wtime += writer_time[i];
+	}
+	free(test_rcu_pointer);
+	printf("Time per read : %g cycles\n",
+	       (double)tot_rtime / ((double)NR_READ * (double)READ_LOOP));
+	printf("Time per write : %g cycles\n",
+	       (double)tot_wtime / ((double)NR_WRITE * (double)WRITE_LOOP));
+
+	free(reader_time);
+	free(writer_time);
+	free(tid_reader);
+	free(tid_writer);
+
+	return 0;
+}
diff --git a/tests/urcu-asm.c b/tests/urcu-asm.c
new file mode 100644
index 0000000..66be709
--- /dev/null
+++ b/tests/urcu-asm.c
@@ -0,0 +1,37 @@
+/*
+ * urcu-asm.c
+ *
+ * Userspace RCU library - assembly dump of primitives
+ *
+ * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "../urcu.h"
+
+void show_read_lock(void)
+{
+	asm volatile ("/* start */");
+	rcu_read_lock();
+	asm volatile ("/* end */");
+}
+
+void show_read_unlock(void)
+{
+	asm volatile ("/* start */");
+	rcu_read_unlock();
+	asm volatile ("/* end */");
+}
diff --git a/tests/urcutorture.c b/tests/urcutorture.c
new file mode 100644
index 0000000..70abc68
--- /dev/null
+++ b/tests/urcutorture.c
@@ -0,0 +1,9 @@
+#include <string.h>
+#include <sys/time.h>
+#include <poll.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include "api.h"
+#define _LGPL_SOURCE
+#include "../urcu.h"
+#include "rcutorture.h"
diff --git a/urcu-asm.c b/urcu-asm.c
deleted file mode 100644
index 0e833f4..0000000
--- a/urcu-asm.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * urcu-asm.c
- *
- * Userspace RCU library - assembly dump of primitives
- *
- * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include "urcu.h"
-
-void show_read_lock(void)
-{
-	asm volatile ("/* start */");
-	rcu_read_lock();
-	asm volatile ("/* end */");
-}
-
-void show_read_unlock(void)
-{
-	asm volatile ("/* start */");
-	rcu_read_unlock();
-	asm volatile ("/* end */");
-}
diff --git a/urcutorture.c b/urcutorture.c
deleted file mode 100644
index 75256f9..0000000
--- a/urcutorture.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <string.h>
-#include <sys/time.h>
-#include <poll.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include "api.h"
-#define _LGPL_SOURCE
-#include "urcu.h"
-#include "rcutorture.h"