From bf9de1b724767a7b0d9f32385ed3ab8623aabb71 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 19 Oct 2009 19:43:34 -0400 Subject: [PATCH] uatomic compat: complete i386 support Signed-off-by: Mathieu Desnoyers --- compat_arch_x86.c | 158 ++++++++++++++++++++++++++++++++++------ configure.ac | 16 +++- tests/Makefile.am | 2 +- urcu-pointer-static.h | 7 +- urcu-pointer.c | 8 +- urcu/config.h.in | 3 + urcu/uatomic_arch_x86.h | 129 ++++++++++++++++++++++---------- 7 files changed, 245 insertions(+), 78 deletions(-) diff --git a/compat_arch_x86.c b/compat_arch_x86.c index e08ac89..fc504c3 100644 --- a/compat_arch_x86.c +++ b/compat_arch_x86.c @@ -33,8 +33,6 @@ */ int __attribute__((constructor)) __urcu_cas_init(void); -static pthread_mutex_t compat_mutex = PTHREAD_MUTEX_INITIALIZER; - /* * -1: unknown * 1: available @@ -42,23 +40,26 @@ static pthread_mutex_t compat_mutex = PTHREAD_MUTEX_INITIALIZER; */ int __urcu_cas_avail = -1; +static pthread_mutex_t compat_mutex = PTHREAD_MUTEX_INITIALIZER; + /* - * Imported from glibc 2.3.5. linuxthreads/sysdeps/i386/pt-machine.h. + * get_eflags/set_eflags/compare_and_swap_is_available imported from glibc + * 2.3.5. linuxthreads/sysdeps/i386/pt-machine.h. */ -int get_eflags (void) +static int get_eflags (void) { int res; __asm__ __volatile__ ("pushfl; popl %0" : "=r" (res) : ); return res; } -void set_eflags (int newflags) +static void set_eflags (int newflags) { __asm__ __volatile__ ("pushl %0; popfl" : : "r" (newflags) : "cc"); } -int compare_and_swap_is_available (void) +static int compare_and_swap_is_available (void) { int oldflags = get_eflags (); int changed; @@ -73,52 +74,161 @@ int compare_and_swap_is_available (void) return changed != 0; } -unsigned long _compat_uatomic_cmpxchg(void *addr, unsigned long old, - unsigned long _new, int len) +static void mutex_lock_signal_save(pthread_mutex_t *mutex, sigset_t *oldmask) { - sigset_t newmask, oldmask; + sigset_t newmask; int ret; /* Disable signals */ ret = sigemptyset(&newmask); assert(!ret); - ret = pthread_sigmask(SIG_SETMASK, &newmask, &oldmask); + ret = pthread_sigmask(SIG_SETMASK, &newmask, oldmask); assert(!ret); ret = pthread_mutex_lock(&compat_mutex); assert(!ret); +} + +static void mutex_lock_signal_restore(pthread_mutex_t *mutex, sigset_t *oldmask) +{ + int ret; + + ret = pthread_mutex_unlock(&compat_mutex); + assert(!ret); + ret = pthread_sigmask(SIG_SETMASK, oldmask, NULL); + assert(!ret); +} + +unsigned long _compat_uatomic_set(void *addr, unsigned long _new, int len) +{ + sigset_t mask; + unsigned long result; + + mutex_lock_signal_save(&compat_mutex, &mask); + switch (len) { + case 1: + *(unsigned char *)addr = (unsigned char)_new; + result = *(unsigned char *)addr; + break; + case 2: + *(unsigned short *)addr = (unsigned short)_new; + result = *(unsigned short *)addr; + break; + case 4: + *(unsigned int *)addr = (unsigned int)_new; + result = *(unsigned int *)addr; + break; + default: + /* + * generate an illegal instruction. Cannot catch this with + * linker tricks when optimizations are disabled. + */ + __asm__ __volatile__("ud2"); + } + mutex_lock_signal_restore(&compat_mutex, &mask); + return _new; +} +unsigned long _compat_uatomic_xchg(void *addr, unsigned long _new, int len) +{ + sigset_t mask; + unsigned long retval; + + mutex_lock_signal_save(&compat_mutex, &mask); + switch (len) { + case 1: + retval = *(unsigned char *)addr; + *(unsigned char *)addr = (unsigned char)_new; + break; + case 2: + retval = *(unsigned short *)addr; + *(unsigned short *)addr = (unsigned short)_new; + break; + case 4: + retval = *(unsigned int *)addr; + *(unsigned int *)addr = (unsigned int)_new; + break; + default: + /* + * generate an illegal instruction. Cannot catch this with + * linker tricks when optimizations are disabled. + */ + __asm__ __volatile__("ud2"); + } + mutex_lock_signal_restore(&compat_mutex, &mask); + return retval; +} + +unsigned long _compat_uatomic_cmpxchg(void *addr, unsigned long old, + unsigned long _new, int len) +{ + unsigned long retval; + sigset_t mask; + + mutex_lock_signal_save(&compat_mutex, &mask); switch (len) { case 1: { unsigned char result = *(unsigned char *)addr; - if (result == old) + if (result == (unsigned char)old) *(unsigned char *)addr = (unsigned char)_new; - return result; + retval = result; + break; } case 2: { unsigned short result = *(unsigned short *)addr; - if (result == old) + if (result == (unsigned short)old) *(unsigned short *)addr = (unsigned short)_new; - return result; + retval = result; + break; } case 4: { unsigned int result = *(unsigned int *)addr; - if (result == old) + if (result == (unsigned int)old) *(unsigned int *)addr = (unsigned int)_new; - return result; + retval = result; + break; } + default: + /* + * generate an illegal instruction. Cannot catch this with + * linker tricks when optimizations are disabled. + */ + __asm__ __volatile__("ud2"); } - /* generate an illegal instruction. Cannot catch this with linker tricks - * when optimizations are disabled. */ - __asm__ __volatile__("ud2"); - return 0; + mutex_lock_signal_restore(&compat_mutex, &mask); + return retval; +} - ret = pthread_mutex_unlock(&compat_mutex); - assert(!ret); - ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); - assert(!ret); +unsigned long _compat_uatomic_add_return(void *addr, unsigned long v, int len) +{ + sigset_t mask; + unsigned long result; + + mutex_lock_signal_save(&compat_mutex, &mask); + switch (len) { + case 1: + *(unsigned char *)addr += (unsigned char)v; + result = *(unsigned char *)addr; + break; + case 2: + *(unsigned short *)addr += (unsigned short)v; + result = *(unsigned short *)addr; + break; + case 4: + *(unsigned int *)addr += (unsigned int)v; + result = *(unsigned int *)addr; + break; + default: + /* + * generate an illegal instruction. Cannot catch this with + * linker tricks when optimizations are disabled. + */ + __asm__ __volatile__("ud2"); + } + mutex_lock_signal_restore(&compat_mutex, &mask); + return result; } int __urcu_cas_init(void) diff --git a/configure.ac b/configure.ac index 3d772e3..9bb763d 100644 --- a/configure.ac +++ b/configure.ac @@ -16,6 +16,8 @@ AC_CONFIG_HEADERS([config.h urcu/config.h]) AH_TEMPLATE([CONFIG_URCU_SMP], [Enable SMP support. With SMP support enabled, uniprocessors are also supported. With SMP support disabled, UP systems work fine, but the behavior of SMP systems is undefined.]) AH_TEMPLATE([CONFIG_URCU_HAVE_FENCE], [Defined when on a system that has memory fence instructions.]) AH_TEMPLATE([CONFIG_URCU_HAVE_FUTEX], [Defined when on a system with futex support.]) +AH_TEMPLATE([CONFIG_URCU_COMPAT_ARCH], [Compatibility mode for i386 which lacks +cmpxchg instruction.]) # Checks for programs. AC_PROG_CC @@ -35,9 +37,9 @@ AC_CHECK_FUNCS([bzero gettimeofday munmap strtoul]) # Find arch type case $target_cpu in i386) ARCHTYPE="x86"; SUBARCHTYPE="x86compat" ;; - i486) ARCHTYPE="x86"; SUBARCHTYPE="x86compat" ;; - i586) ARCHTYPE="x86"; SUBARCHTYPE="x86compat" ;; - i686) ARCHTYPE="x86"; SUBARCHTYPE="x86compat" ;; + i486) ARCHTYPE="x86";; + i586) ARCHTYPE="x86";; + i686) ARCHTYPE="x86";; x86_64) ARCHTYPE="x86";; powerpc) ARCHTYPE="ppc" ;; ppc64) ARCHTYPE="ppc" ;; @@ -94,6 +96,14 @@ AM_CONDITIONAL([GCC_API], [test "x$ARCHTYPE" != xx86 -a "x$ARCHTYPE" != xppc]) AM_CONDITIONAL([COMPAT_ARCH], [test "x$SUBARCHTYPE" = xx86compat ]) +[ +if test "x$SUBARCHTYPE" = xx86compat; then +] + AC_DEFINE([CONFIG_URCU_COMPAT_ARCH], [1]) +[ +fi +] + AC_ARG_ENABLE([smp-support], [ --disable-smp-support Disable SMP support. Warning: only use this on uniprocessor systems. [[default=enabled]]], [def_smp_support=$enableval], [def_smp_support="yes"]) diff --git a/tests/Makefile.am b/tests/Makefile.am index 995c0bd..c39ad5b 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -114,7 +114,7 @@ test_qsbr_dynamic_link_CFLAGS = -DDYNAMIC_LINK_TEST $(AM_CFLAGS) test_urcu_mb_defer_SOURCES = test_urcu_defer.c $(URCU_MB_DEFER) test_urcu_mb_defer_CFLAGS = -DURCU_MB $(AM_CFLAGS) -test_uatomic_SOURCES = test_uatomic.c +test_uatomic_SOURCES = test_uatomic.c $(COMPAT) test_urcu_assign_SOURCES = test_urcu_assign.c $(URCU_SIGNAL) diff --git a/urcu-pointer-static.h b/urcu-pointer-static.h index c0533c9..eb073af 100644 --- a/urcu-pointer-static.h +++ b/urcu-pointer-static.h @@ -78,10 +78,7 @@ if (!__builtin_constant_p(_new) || \ ((_new) != NULL)) \ wmb(); \ - (likely(URCU_CAS_AVAIL()) ? \ - (uatomic_cmpxchg(p, _________pold, _________pnew)) : \ - (compat_uatomic_cmpxchg(p, _________pold, \ - _________pnew))) \ + uatomic_cmpxchg(p, _________pold, _________pnew); \ }) /** @@ -106,7 +103,7 @@ if (!__builtin_constant_p(v) || \ ((v) != NULL)) \ wmb(); \ - STORE_SHARED(*(p), _________pv); \ + uatomic_set(p, _________pv); \ }) /** diff --git a/urcu-pointer.c b/urcu-pointer.c index 83f0ffd..1c5f6bd 100644 --- a/urcu-pointer.c +++ b/urcu-pointer.c @@ -40,7 +40,7 @@ void *rcu_dereference_sym(void *p) void *rcu_set_pointer_sym(void **p, void *v) { wmb(); - return STORE_SHARED(*p, v); + return uatomic_set(p, v); } void *rcu_xchg_pointer_sym(void **p, void *v) @@ -52,9 +52,5 @@ void *rcu_xchg_pointer_sym(void **p, void *v) void *rcu_cmpxchg_pointer_sym(void **p, void *old, void *_new) { wmb(); - if (likely(URCU_CAS_AVAIL())) - return uatomic_cmpxchg(p, old, _new); - - /* Compatibility for i386. Old-timer. */ - return compat_uatomic_cmpxchg(p, old, _new); + return uatomic_cmpxchg(p, old, _new); } diff --git a/urcu/config.h.in b/urcu/config.h.in index d61d2a9..48024b6 100644 --- a/urcu/config.h.in +++ b/urcu/config.h.in @@ -10,3 +10,6 @@ supported. With SMP support disabled, UP systems work fine, but the behavior of SMP systems is undefined. */ #undef CONFIG_URCU_SMP + +/* Compatibility mode for i386 which lacks cmpxchg instruction. */ +#undef CONFIG_URCU_COMPAT_ARCH diff --git a/urcu/uatomic_arch_x86.h b/urcu/uatomic_arch_x86.h index 39ec407..8e0f0a8 100644 --- a/urcu/uatomic_arch_x86.h +++ b/urcu/uatomic_arch_x86.h @@ -21,6 +21,7 @@ */ #include +#include #ifndef __SIZEOF_LONG__ #if defined(__x86_64__) || defined(__amd64__) @@ -43,17 +44,13 @@ struct __uatomic_dummy { }; #define __hp(x) ((struct __uatomic_dummy *)(x)) -#define uatomic_set(addr, v) \ -do { \ - ACCESS_ONCE(*(addr)) = (v); \ -} while (0) - -#define uatomic_read(addr) ACCESS_ONCE(*(addr)) +#define _uatomic_set(addr, v) STORE_SHARED(*(addr), (v)) +#define _uatomic_read(addr) LOAD_SHARED(*(addr)) /* cmpxchg */ static inline __attribute__((always_inline)) -unsigned long _uatomic_cmpxchg(void *addr, unsigned long old, +unsigned long __uatomic_cmpxchg(void *addr, unsigned long old, unsigned long _new, int len) { switch (len) { @@ -110,15 +107,15 @@ unsigned long _uatomic_cmpxchg(void *addr, unsigned long old, return 0; } -#define uatomic_cmpxchg(addr, old, _new) \ - ((__typeof__(*(addr))) _uatomic_cmpxchg((addr), (unsigned long)(old),\ - (unsigned long)(_new), \ +#define _uatomic_cmpxchg(addr, old, _new) \ + ((__typeof__(*(addr))) __uatomic_cmpxchg((addr), (unsigned long)(old),\ + (unsigned long)(_new), \ sizeof(*(addr)))) /* xchg */ static inline __attribute__((always_inline)) -unsigned long _uatomic_exchange(void *addr, unsigned long val, int len) +unsigned long __uatomic_exchange(void *addr, unsigned long val, int len) { /* Note: the "xchg" instruction does not need a "lock" prefix. */ switch (len) { @@ -171,14 +168,14 @@ unsigned long _uatomic_exchange(void *addr, unsigned long val, int len) return 0; } -#define uatomic_xchg(addr, v) \ - ((__typeof__(*(addr))) _uatomic_exchange((addr), (unsigned long)(v), \ +#define _uatomic_xchg(addr, v) \ + ((__typeof__(*(addr))) __uatomic_exchange((addr), (unsigned long)(v), \ sizeof(*(addr)))) /* uatomic_add_return, uatomic_sub_return */ static inline __attribute__((always_inline)) -unsigned long _uatomic_add_return(void *addr, unsigned long val, +unsigned long __uatomic_add_return(void *addr, unsigned long val, int len) { switch (len) { @@ -235,17 +232,17 @@ unsigned long _uatomic_add_return(void *addr, unsigned long val, return 0; } -#define uatomic_add_return(addr, v) \ - ((__typeof__(*(addr))) _uatomic_add_return((addr), \ +#define _uatomic_add_return(addr, v) \ + ((__typeof__(*(addr))) __uatomic_add_return((addr), \ (unsigned long)(v), \ sizeof(*(addr)))) -#define uatomic_sub_return(addr, v) uatomic_add_return((addr), -(v)) +#define _uatomic_sub_return(addr, v) _uatomic_add_return((addr), -(v)) /* uatomic_add, uatomic_sub */ static inline __attribute__((always_inline)) -void _uatomic_add(void *addr, unsigned long val, int len) +void __uatomic_add(void *addr, unsigned long val, int len) { switch (len) { case 1: @@ -293,16 +290,16 @@ void _uatomic_add(void *addr, unsigned long val, int len) return; } -#define uatomic_add(addr, v) \ - (_uatomic_add((addr), (unsigned long)(v), sizeof(*(addr)))) +#define _uatomic_add(addr, v) \ + (__uatomic_add((addr), (unsigned long)(v), sizeof(*(addr)))) -#define uatomic_sub(addr, v) uatomic_add((addr), -(v)) +#define _uatomic_sub(addr, v) _uatomic_add((addr), -(v)) /* uatomic_inc */ static inline __attribute__((always_inline)) -void _uatomic_inc(void *addr, int len) +void __uatomic_inc(void *addr, int len) { switch (len) { case 1: @@ -350,12 +347,12 @@ void _uatomic_inc(void *addr, int len) return; } -#define uatomic_inc(addr) (_uatomic_inc((addr), sizeof(*(addr)))) +#define _uatomic_inc(addr) (__uatomic_inc((addr), sizeof(*(addr)))) /* uatomic_dec */ static inline __attribute__((always_inline)) -void _uatomic_dec(void *addr, int len) +void __uatomic_dec(void *addr, int len) { switch (len) { case 1: @@ -403,28 +400,82 @@ void _uatomic_dec(void *addr, int len) return; } -#define uatomic_dec(addr) (_uatomic_dec((addr), sizeof(*(addr)))) +#define _uatomic_dec(addr) (__uatomic_dec((addr), sizeof(*(addr)))) -#if (BITS_PER_LONG == 64) -#define URCU_CAS_AVAIL() 1 -#define compat_uatomic_cmpxchg(ptr, old, _new) uatomic_cmpxchg(ptr, old, _new) -#else +#if ((BITS_PER_LONG != 64) && defined(CONFIG_URCU_COMPAT_ARCH)) extern int __urcu_cas_avail; extern int __urcu_cas_init(void); -#define URCU_CAS_AVAIL() \ - ((likely(__urcu_cas_avail > 0)) ? \ - (1) : \ - ((unlikely(__urcu_cas_avail < 0) ? \ - (__urcu_cas_init()) : \ - (0)))) + +#define UATOMIC_COMPAT(insn) \ + ((likely(__urcu_cas_avail > 0)) \ + ? (_uatomic_##insn) \ + : ((unlikely(__urcu_cas_avail < 0) \ + ? ((__urcu_cas_init() > 0) \ + ? (_uatomic_##insn) \ + : (compat_uatomic_##insn)) \ + : (compat_uatomic_##insn)))) + +extern unsigned long _compat_uatomic_set(void *addr, + unsigned long _new, int len); +#define compat_uatomic_set(addr, _new) \ + ((__typeof__(*(addr))) _compat_uatomic_set((addr), \ + (unsigned long)(_new), \ + sizeof(*(addr)))) + + +extern unsigned long _compat_uatomic_xchg(void *addr, + unsigned long _new, int len); +#define compat_uatomic_xchg(addr, _new) \ + ((__typeof__(*(addr))) _compat_uatomic_xchg((addr), \ + (unsigned long)(_new), \ + sizeof(*(addr)))) extern unsigned long _compat_uatomic_cmpxchg(void *addr, unsigned long old, - unsigned long _new, int len); + unsigned long _new, int len); +#define compat_uatomic_cmpxchg(addr, old, _new) \ + ((__typeof__(*(addr))) _compat_uatomic_cmpxchg((addr), \ + (unsigned long)(old), \ + (unsigned long)(_new), \ + sizeof(*(addr)))) -#define compat_uatomic_cmpxchg(addr, old, _new) \ - ((__typeof__(*(addr))) _uatomic_cmpxchg((addr), (unsigned long)(old),\ - (unsigned long)(_new), \ +extern unsigned long _compat_uatomic_xchg(void *addr, + unsigned long _new, int len); +#define compat_uatomic_add_return(addr, v) \ + ((__typeof__(*(addr))) _compat_uatomic_add_return((addr), \ + (unsigned long)(v), \ sizeof(*(addr)))) + +#define compat_uatomic_sub_return(addr, v) \ + compat_uatomic_add_return((addr), -(v)) +#define compat_uatomic_add(addr, v) \ + ((void)compat_uatomic_add_return((addr), (v))) +#define compat_uatomic_sub(addr, v) \ + ((void)compat_uatomic_sub_return((addr), (v))) +#define compat_uatomic_inc(addr) \ + (compat_uatomic_add((addr), 1)) +#define compat_uatomic_dec(addr) \ + (compat_uatomic_sub((addr), 1)) + +#else +#define UATOMIC_COMPAT(insn) (_uatomic_##insn) #endif +/* Read is atomic even in compat mode */ +#define uatomic_read(addr) _uatomic_read(addr) + +#define uatomic_set(addr, v) \ + UATOMIC_COMPAT(set(addr, v)) +#define uatomic_cmpxchg(addr, old, _new) \ + UATOMIC_COMPAT(cmpxchg(addr, old, _new)) +#define uatomic_xchg(addr, v) \ + UATOMIC_COMPAT(xchg(addr, v)) +#define uatomic_add_return(addr, v) \ + UATOMIC_COMPAT(add_return(addr, v)) +#define uatomic_sub_return(addr, v) \ + UATOMIC_COMPAT(sub_return(addr, v)) +#define uatomic_add(addr, v) UATOMIC_COMPAT(add(addr, v)) +#define uatomic_sub(addr, v) UATOMIC_COMPAT(sub(addr, v)) +#define uatomic_inc(addr) UATOMIC_COMPAT(inc(addr)) +#define uatomic_dec(addr) UATOMIC_COMPAT(dec(addr)) + #endif /* _URCU_ARCH_UATOMIC_X86_H */ -- 2.34.1