X-Git-Url: http://git.liburcu.org/?p=urcu.git;a=blobdiff_plain;f=urcu-static.h;h=18e4826a5a31789c7cb408832421cbddb7d83c22;hp=f819e6fc9aa07e9f4e58329661bf5a494936bb25;hb=acfd099eae9026369f558e73b2a7e35860962d9d;hpb=4d1ce26fea0296ff67791c881836393fdd020cf9 diff --git a/urcu-static.h b/urcu-static.h index f819e6f..18e4826 100644 --- a/urcu-static.h +++ b/urcu-static.h @@ -9,7 +9,7 @@ * TO BE INCLUDED ONLY IN LGPL-COMPATIBLE CODE. See urcu.h for linking * dynamically with the userspace rcu library. * - * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Mathieu Desnoyers * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. * * This library is free software; you can redistribute it and/or @@ -31,68 +31,40 @@ #include #include +#include +#include -#include -#include +#include +#include +#include +#include +#include +#include -/* - * Identify a shared load. A smp_rmc() or smp_mc() should come before the load. - */ -#define _LOAD_SHARED(p) ACCESS_ONCE(p) - -/* - * Load a data from shared memory, doing a cache flush if required. - */ -#define LOAD_SHARED(p) \ - ({ \ - smp_rmc(); \ - _LOAD_SHARED(p); \ - }) - -/* - * Identify a shared store. A smp_wmc() or smp_mc() should follow the store. - */ -#define _STORE_SHARED(x, v) ({ ACCESS_ONCE(x) = (v); }) +#ifdef __cplusplus +extern "C" { +#endif -/* - * Store v into x, where x is located in shared memory. Performs the required - * cache flush after writing. Returns v. - */ -#define STORE_SHARED(x, v) \ - ({ \ - _STORE_SHARED(x, v); \ - smp_wmc(); \ - (v); \ - }) - -/** - * _rcu_dereference - reads (copy) a RCU-protected pointer to a local variable - * into a RCU read-side critical section. The pointer can later be safely - * dereferenced within the critical section. - * - * This ensures that the pointer copy is invariant thorough the whole critical - * section. - * - * Inserts memory barriers on architectures that require them (currently only - * Alpha) and documents which pointers are protected by RCU. - * - * The compiler memory barrier in LOAD_SHARED() ensures that value-speculative - * optimizations (e.g. VSS: Value Speculation Scheduling) does not perform the - * data read before the pointer read by speculating the value of the pointer. - * Correct ordering is ensured because the pointer is read as a volatile access. - * This acts as a global side-effect operation, which forbids reordering of - * dependent memory operations. Note that such concern about dependency-breaking - * optimizations will eventually be taken care of by the "memory_order_consume" - * addition to forthcoming C++ standard. - * - * Should match rcu_assign_pointer() or rcu_xchg_pointer(). - */ +/* Default is RCU_MEMBARRIER */ +#if !defined(RCU_MEMBARRIER) && !defined(RCU_MB) && !defined(RCU_SIGNAL) +#define RCU_MEMBARRIER +#endif -#define _rcu_dereference(p) ({ \ - typeof(p) _________p1 = LOAD_SHARED(p); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) +#ifdef RCU_MEMBARRIER +#include +#include + +/* If the headers do not support SYS_membarrier, statically use RCU_MB */ +#ifdef SYS_membarrier +# define MEMBARRIER_EXPEDITED (1 << 0) +# define MEMBARRIER_DELAYED (1 << 1) +# define MEMBARRIER_QUERY (1 << 16) +# define membarrier(...) syscall(SYS_membarrier, __VA_ARGS__) +#else +# undef RCU_MEMBARRIER +# define RCU_MB +#endif +#endif /* * This code section can only be included in LGPL 2.1 compatible source code. @@ -104,19 +76,29 @@ /* * The signal number used by the RCU library can be overridden with - * -DSIGURCU= when compiling the library. + * -DSIGRCU= when compiling the library. + * Provide backward compatibility for liburcu 0.3.x SIGURCU. */ -#ifndef SIGURCU -#define SIGURCU SIGUSR1 +#ifdef SIGURCU +#define SIGRCU SIGURCU +#endif + +#ifndef SIGRCU +#define SIGRCU SIGUSR1 #endif /* * If a reader is really non-cooperative and refuses to commit its - * urcu_active_readers count to memory (there is no barrier in the reader + * rcu_active_readers count to memory (there is no barrier in the reader * per-se), kick it after a few loops waiting for it. */ #define KICK_READER_LOOPS 10000 +/* + * Active attempts to check for reader Q.S. before calling futex(). + */ +#define RCU_QS_ACTIVE_ATTEMPTS 100 + #ifdef DEBUG_RCU #define rcu_assert(args...) assert(args) #else @@ -133,14 +115,13 @@ #define YIELD_WRITE (1 << 1) /* - * Updates without URCU_MB are much slower. Account this in - * the delay. + * Updates with RCU_SIGNAL are much slower. Account this in the delay. */ -#ifdef URCU_MB +#ifdef RCU_SIGNAL /* maximum sleep delay, in us */ -#define MAX_SLEEP 50 -#else #define MAX_SLEEP 30000 +#else +#define MAX_SLEEP 50 #endif extern unsigned int yield_active; @@ -179,143 +160,146 @@ static inline void debug_yield_init(void) } #endif -#ifdef URCU_MB -static inline void reader_barrier() +/* + * RCU memory barrier broadcast group. Currently, only broadcast to all process + * threads is supported (group 0). + * + * Slave barriers are only guaranteed to be ordered wrt master barriers. + * + * The pair ordering is detailed as (O: ordered, X: not ordered) : + * slave master + * slave X O + * master O O + */ + +#define MB_GROUP_ALL 0 +#define RCU_MB_GROUP MB_GROUP_ALL + +#ifdef RCU_MEMBARRIER +extern int has_sys_membarrier; + +static inline void smp_mb_slave(int group) +{ + if (likely(has_sys_membarrier)) + cmm_barrier(); + else + cmm_smp_mb(); +} +#endif + +#ifdef RCU_MB +static inline void smp_mb_slave(int group) { - smp_mb(); + cmm_smp_mb(); } -#else -static inline void reader_barrier() +#endif + +#ifdef RCU_SIGNAL +static inline void smp_mb_slave(int group) { - barrier(); + cmm_barrier(); } #endif /* - * The trick here is that RCU_GP_CTR_BIT must be a multiple of 8 so we can use a - * full 8-bits, 16-bits or 32-bits bitmask for the lower order bits. + * The trick here is that RCU_GP_CTR_PHASE must be a multiple of 8 so we can use + * a full 8-bits, 16-bits or 32-bits bitmask for the lower order bits. */ #define RCU_GP_COUNT (1UL << 0) /* Use the amount of bits equal to half of the architecture long size */ -#define RCU_GP_CTR_BIT (1UL << (sizeof(long) << 2)) -#define RCU_GP_CTR_NEST_MASK (RCU_GP_CTR_BIT - 1) +#define RCU_GP_CTR_PHASE (1UL << (sizeof(unsigned long) << 2)) +#define RCU_GP_CTR_NEST_MASK (RCU_GP_CTR_PHASE - 1) /* * Global quiescent period counter with low-order bits unused. * Using a int rather than a char to eliminate false register dependencies * causing stalls on some architectures. */ -extern long urcu_gp_ctr; +extern unsigned long rcu_gp_ctr; -extern long __thread urcu_active_readers; +struct rcu_reader { + /* Data used by both reader and synchronize_rcu() */ + unsigned long ctr; + char need_mb; + /* Data used for registry */ + struct cds_list_head node __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + pthread_t tid; +}; -static inline int rcu_old_gp_ongoing(long *value) +extern struct rcu_reader __thread rcu_reader; + +extern int gp_futex; + +/* + * Wake-up waiting synchronize_rcu(). Called from many concurrent threads. + */ +static inline void wake_up_gp(void) { - long v; + if (unlikely(uatomic_read(&gp_futex) == -1)) { + uatomic_set(&gp_futex, 0); + futex_async(&gp_futex, FUTEX_WAKE, 1, + NULL, NULL, 0); + } +} + +static inline int rcu_gp_ongoing(unsigned long *ctr) +{ + unsigned long v; - if (value == NULL) - return 0; /* * Make sure both tests below are done on the same version of *value * to insure consistency. */ - v = LOAD_SHARED(*value); + v = CMM_LOAD_SHARED(*ctr); return (v & RCU_GP_CTR_NEST_MASK) && - ((v ^ urcu_gp_ctr) & RCU_GP_CTR_BIT); + ((v ^ rcu_gp_ctr) & RCU_GP_CTR_PHASE); } static inline void _rcu_read_lock(void) { - long tmp; + unsigned long tmp; - tmp = urcu_active_readers; - /* urcu_gp_ctr = RCU_GP_COUNT | (~RCU_GP_CTR_BIT or RCU_GP_CTR_BIT) */ + cmm_barrier(); /* Ensure the compiler does not reorder us with mutex */ + tmp = rcu_reader.ctr; + /* + * rcu_gp_ctr is + * RCU_GP_COUNT | (~RCU_GP_CTR_PHASE or RCU_GP_CTR_PHASE) + */ if (likely(!(tmp & RCU_GP_CTR_NEST_MASK))) { - _STORE_SHARED(urcu_active_readers, _LOAD_SHARED(urcu_gp_ctr)); + _CMM_STORE_SHARED(rcu_reader.ctr, _CMM_LOAD_SHARED(rcu_gp_ctr)); /* * Set active readers count for outermost nesting level before - * accessing the pointer. See force_mb_all_threads(). + * accessing the pointer. See smp_mb_master(). */ - reader_barrier(); + smp_mb_slave(RCU_MB_GROUP); } else { - _STORE_SHARED(urcu_active_readers, tmp + RCU_GP_COUNT); + _CMM_STORE_SHARED(rcu_reader.ctr, tmp + RCU_GP_COUNT); } } static inline void _rcu_read_unlock(void) { - reader_barrier(); + unsigned long tmp; + + tmp = rcu_reader.ctr; /* * Finish using rcu before decrementing the pointer. - * See force_mb_all_threads(). - * Formally only needed for outermost nesting level, but leave barrier - * in place for nested unlocks to remove a branch from the common case - * (no nesting). + * See smp_mb_master(). */ - _STORE_SHARED(urcu_active_readers, urcu_active_readers - RCU_GP_COUNT); + if (likely((tmp & RCU_GP_CTR_NEST_MASK) == RCU_GP_COUNT)) { + smp_mb_slave(RCU_MB_GROUP); + _CMM_STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT); + /* write rcu_reader.ctr before read futex */ + smp_mb_slave(RCU_MB_GROUP); + wake_up_gp(); + } else { + _CMM_STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT); + } + cmm_barrier(); /* Ensure the compiler does not reorder us with mutex */ } -/** - * _rcu_assign_pointer - assign (publicize) a pointer to a new data structure - * meant to be read by RCU read-side critical sections. Returns the assigned - * value. - * - * Documents which pointers will be dereferenced by RCU read-side critical - * sections and adds the required memory barriers on architectures requiring - * them. It also makes sure the compiler does not reorder code initializing the - * data structure before its publication. - * - * Should match rcu_dereference_pointer(). - */ - -#define _rcu_assign_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - wmb(); \ - STORE_SHARED(p, v); \ - }) - -/** - * _rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer - * is as expected by "old". If succeeds, returns the previous pointer to the - * data structure, which can be safely freed after waiting for a quiescent state - * using synchronize_rcu(). If fails (unexpected value), returns old (which - * should not be freed !). - */ - -#define _rcu_cmpxchg_pointer(p, old, _new) \ - ({ \ - if (!__builtin_constant_p(_new) || \ - ((_new) != NULL)) \ - wmb(); \ - cmpxchg(p, old, _new); \ - }) - -/** - * _rcu_xchg_pointer - same as rcu_assign_pointer, but returns the previous - * pointer to the data structure, which can be safely freed after waiting for a - * quiescent state using synchronize_rcu(). - */ - -#define _rcu_xchg_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - wmb(); \ - xchg(p, v); \ - }) - -/* - * Exchanges the pointer and waits for quiescent state. - * The pointer returned can be freed. - */ -#define _rcu_publish_content(p, v) \ - ({ \ - void *oldptr; \ - oldptr = _rcu_xchg_pointer(p, v); \ - synchronize_rcu(); \ - oldptr; \ - }) +#ifdef __cplusplus +} +#endif #endif /* _URCU_STATIC_H */