X-Git-Url: http://git.liburcu.org/?p=urcu.git;a=blobdiff_plain;f=urcu-static.h;h=d46613196f5d471b21e584a85c0eaa7f60154e18;hp=66a06df85e0f72080bdb79a991e0f5d3e8691125;hb=9d7e3f89772f08cca26d727f47d44ecd47c94401;hpb=015c702fdc5f2a87110a99111d13da5e91d1f3db diff --git a/urcu-static.h b/urcu-static.h index 66a06df..d466131 100644 --- a/urcu-static.h +++ b/urcu-static.h @@ -31,68 +31,19 @@ #include #include +#include +#include -#include -#include +#include +#include +#include +#include +#include +#include -/* - * Identify a shared load. A smp_rmc() or smp_mc() should come before the load. - */ -#define _LOAD_SHARED(p) ACCESS_ONCE(p) - -/* - * Load a data from shared memory, doing a cache flush if required. - */ -#define LOAD_SHARED(p) \ - ({ \ - smp_rmc(); \ - _LOAD_SHARED(p); \ - }) - -/* - * Identify a shared store. A smp_wmc() or smp_mc() should follow the store. - */ -#define _STORE_SHARED(x, v) ({ ACCESS_ONCE(x) = (v); }) - -/* - * Store v into x, where x is located in shared memory. Performs the required - * cache flush after writing. Returns v. - */ -#define STORE_SHARED(x, v) \ - ({ \ - _STORE_SHARED(x, v); \ - smp_wmc(); \ - (v); \ - }) - -/** - * _rcu_dereference - reads (copy) a RCU-protected pointer to a local variable - * into a RCU read-side critical section. The pointer can later be safely - * dereferenced within the critical section. - * - * This ensures that the pointer copy is invariant thorough the whole critical - * section. - * - * Inserts memory barriers on architectures that require them (currently only - * Alpha) and documents which pointers are protected by RCU. - * - * The compiler memory barrier in LOAD_SHARED() ensures that value-speculative - * optimizations (e.g. VSS: Value Speculation Scheduling) does not perform the - * data read before the pointer read by speculating the value of the pointer. - * Correct ordering is ensured because the pointer is read as a volatile access. - * This acts as a global side-effect operation, which forbids reordering of - * dependent memory operations. Note that such concern about dependency-breaking - * optimizations will eventually be taken care of by the "memory_order_consume" - * addition to forthcoming C++ standard. - * - * Should match rcu_assign_pointer() or rcu_xchg_pointer(). - */ - -#define _rcu_dereference(p) ({ \ - typeof(p) _________p1 = LOAD_SHARED(p); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) +#ifdef __cplusplus +extern "C" { +#endif /* * This code section can only be included in LGPL 2.1 compatible source code. @@ -104,19 +55,30 @@ /* * The signal number used by the RCU library can be overridden with - * -DSIGURCU= when compiling the library. + * -DSIGRCU= when compiling the library. */ -#ifndef SIGURCU -#define SIGURCU SIGUSR1 +#ifndef SIGRCU +#define SIGRCU SIGUSR1 #endif /* * If a reader is really non-cooperative and refuses to commit its - * urcu_active_readers count to memory (there is no barrier in the reader + * rcu_active_readers count to memory (there is no barrier in the reader * per-se), kick it after a few loops waiting for it. */ #define KICK_READER_LOOPS 10000 +/* + * Active attempts to check for reader Q.S. before calling futex(). + */ +#define RCU_QS_ACTIVE_ATTEMPTS 100 + +#ifdef DEBUG_RCU +#define rcu_assert(args...) assert(args) +#else +#define rcu_assert(args...) +#endif + #ifdef DEBUG_YIELD #include #include @@ -127,10 +89,10 @@ #define YIELD_WRITE (1 << 1) /* - * Updates without CONFIG_URCU_AVOID_SIGNALS are much slower. Account this in + * Updates without RCU_MB are much slower. Account this in * the delay. */ -#ifdef CONFIG_URCU_AVOID_SIGNALS +#ifdef RCU_MB /* maximum sleep delay, in us */ #define MAX_SLEEP 50 #else @@ -173,35 +135,58 @@ static inline void debug_yield_init(void) } #endif -#ifdef CONFIG_URCU_AVOID_SIGNALS -static inline void reader_barrier() +#ifdef RCU_MB +static inline void smp_mb_light() { smp_mb(); } #else -static inline void reader_barrier() +static inline void smp_mb_light() { barrier(); } #endif /* - * The trick here is that RCU_GP_CTR_BIT must be a multiple of 8 so we can use a - * full 8-bits, 16-bits or 32-bits bitmask for the lower order bits. + * The trick here is that RCU_GP_CTR_PHASE must be a multiple of 8 so we can use + * a full 8-bits, 16-bits or 32-bits bitmask for the lower order bits. */ #define RCU_GP_COUNT (1UL << 0) /* Use the amount of bits equal to half of the architecture long size */ -#define RCU_GP_CTR_BIT (1UL << (sizeof(long) << 2)) -#define RCU_GP_CTR_NEST_MASK (RCU_GP_CTR_BIT - 1) +#define RCU_GP_CTR_PHASE (1UL << (sizeof(long) << 2)) +#define RCU_GP_CTR_NEST_MASK (RCU_GP_CTR_PHASE - 1) /* * Global quiescent period counter with low-order bits unused. * Using a int rather than a char to eliminate false register dependencies * causing stalls on some architectures. */ -extern long urcu_gp_ctr; +extern long rcu_gp_ctr; + +struct rcu_reader { + /* Data used by both reader and synchronize_rcu() */ + long ctr; + char need_mb; + /* Data used for registry */ + struct list_head head __attribute__((aligned(CACHE_LINE_SIZE))); + pthread_t tid; +}; -extern long __thread urcu_active_readers; +extern struct rcu_reader __thread rcu_reader; + +extern int gp_futex; + +/* + * Wake-up waiting synchronize_rcu(). Called from many concurrent threads. + */ +static inline void wake_up_gp(void) +{ + if (unlikely(uatomic_read(&gp_futex) == -1)) { + uatomic_set(&gp_futex, 0); + futex_async(&gp_futex, FUTEX_WAKE, 1, + NULL, NULL, 0); + } +} static inline int rcu_old_gp_ongoing(long *value) { @@ -215,85 +200,52 @@ static inline int rcu_old_gp_ongoing(long *value) */ v = LOAD_SHARED(*value); return (v & RCU_GP_CTR_NEST_MASK) && - ((v ^ urcu_gp_ctr) & RCU_GP_CTR_BIT); + ((v ^ rcu_gp_ctr) & RCU_GP_CTR_PHASE); } static inline void _rcu_read_lock(void) { long tmp; - tmp = urcu_active_readers; - /* urcu_gp_ctr = RCU_GP_COUNT | (~RCU_GP_CTR_BIT or RCU_GP_CTR_BIT) */ + tmp = rcu_reader.ctr; + /* + * rcu_gp_ctr is + * RCU_GP_COUNT | (~RCU_GP_CTR_PHASE or RCU_GP_CTR_PHASE) + */ if (likely(!(tmp & RCU_GP_CTR_NEST_MASK))) { - _STORE_SHARED(urcu_active_readers, _LOAD_SHARED(urcu_gp_ctr)); + _STORE_SHARED(rcu_reader.ctr, _LOAD_SHARED(rcu_gp_ctr)); /* * Set active readers count for outermost nesting level before - * accessing the pointer. See force_mb_all_threads(). + * accessing the pointer. See smp_mb_heavy(). */ - reader_barrier(); + smp_mb_light(); } else { - _STORE_SHARED(urcu_active_readers, tmp + RCU_GP_COUNT); + _STORE_SHARED(rcu_reader.ctr, tmp + RCU_GP_COUNT); } } static inline void _rcu_read_unlock(void) { - reader_barrier(); + long tmp; + + tmp = rcu_reader.ctr; /* * Finish using rcu before decrementing the pointer. - * See force_mb_all_threads(). - * Formally only needed for outermost nesting level, but leave barrier - * in place for nested unlocks to remove a branch from the common case - * (no nesting). + * See smp_mb_heavy(). */ - _STORE_SHARED(urcu_active_readers, urcu_active_readers - RCU_GP_COUNT); + if (likely((tmp & RCU_GP_CTR_NEST_MASK) == RCU_GP_COUNT)) { + smp_mb_light(); + _STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT); + /* write rcu_reader.ctr before read futex */ + smp_mb_light(); + wake_up_gp(); + } else { + _STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT); + } } -/** - * _rcu_assign_pointer - assign (publicize) a pointer to a new data structure - * meant to be read by RCU read-side critical sections. Returns the assigned - * value. - * - * Documents which pointers will be dereferenced by RCU read-side critical - * sections and adds the required memory barriers on architectures requiring - * them. It also makes sure the compiler does not reorder code initializing the - * data structure before its publication. - * - * Should match rcu_dereference_pointer(). - */ - -#define _rcu_assign_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - wmb(); \ - STORE_SHARED(p, v); \ - }) - -/** - * _rcu_xchg_pointer - same as rcu_assign_pointer, but returns the previous - * pointer to the data structure, which can be safely freed after waiting for a - * quiescent state using synchronize_rcu(). - */ - -#define _rcu_xchg_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - wmb(); \ - xchg(p, v); \ - }) - -/* - * Exchanges the pointer and waits for quiescent state. - * The pointer returned can be freed. - */ -#define _rcu_publish_content(p, v) \ - ({ \ - void *oldptr; \ - oldptr = _rcu_xchg_pointer(p, v); \ - synchronize_rcu(); \ - oldptr; \ - }) +#ifdef __cplusplus +} +#endif #endif /* _URCU_STATIC_H */