#include <urcu/compiler.h>
#include <urcu/arch.h>
+#include <urcu/system.h>
+#include <urcu/uatomic_arch.h>
#include <urcu/list.h>
+#include <urcu/urcu-futex.h>
-/*
- * Identify a shared load. A smp_rmc() or smp_mc() should come before the load.
- */
-#define _LOAD_SHARED(p) ACCESS_ONCE(p)
-
-/*
- * Load a data from shared memory, doing a cache flush if required.
- */
-#define LOAD_SHARED(p) \
- ({ \
- smp_rmc(); \
- _LOAD_SHARED(p); \
- })
-
-/*
- * Identify a shared store. A smp_wmc() or smp_mc() should follow the store.
- */
-#define _STORE_SHARED(x, v) ({ ACCESS_ONCE(x) = (v); })
-
-/*
- * Store v into x, where x is located in shared memory. Performs the required
- * cache flush after writing. Returns v.
- */
-#define STORE_SHARED(x, v) \
- ({ \
- _STORE_SHARED(x, v); \
- smp_wmc(); \
- (v); \
- })
-
-/**
- * _rcu_dereference - reads (copy) a RCU-protected pointer to a local variable
- * into a RCU read-side critical section. The pointer can later be safely
- * dereferenced within the critical section.
- *
- * This ensures that the pointer copy is invariant thorough the whole critical
- * section.
- *
- * Inserts memory barriers on architectures that require them (currently only
- * Alpha) and documents which pointers are protected by RCU.
- *
- * The compiler memory barrier in LOAD_SHARED() ensures that value-speculative
- * optimizations (e.g. VSS: Value Speculation Scheduling) does not perform the
- * data read before the pointer read by speculating the value of the pointer.
- * Correct ordering is ensured because the pointer is read as a volatile access.
- * This acts as a global side-effect operation, which forbids reordering of
- * dependent memory operations. Note that such concern about dependency-breaking
- * optimizations will eventually be taken care of by the "memory_order_consume"
- * addition to forthcoming C++ standard.
- *
- * Should match rcu_assign_pointer() or rcu_xchg_pointer().
- */
+#ifdef __cplusplus
+extern "C" {
+#endif
-#define _rcu_dereference(p) ({ \
- typeof(p) _________p1 = LOAD_SHARED(p); \
- smp_read_barrier_depends(); \
- (_________p1); \
- })
+/* Default is RCU_MEMBARRIER */
+#if !defined(RCU_MEMBARRIER) && !defined(RCU_MB) && !defined(RCU_SIGNAL)
+#define RCU_MEMBARRIER
+#endif
-#define futex(...) syscall(__NR_futex, __VA_ARGS__)
-#define FUTEX_WAIT 0
-#define FUTEX_WAKE 1
+#ifdef RCU_MEMBARRIER
+#include <unistd.h>
+#include <sys/syscall.h>
+
+/* If the headers do not support SYS_membarrier, statically use RCU_MB */
+#ifdef SYS_membarrier
+# define MEMBARRIER_EXPEDITED (1 << 0)
+# define MEMBARRIER_DELAYED (1 << 1)
+# define MEMBARRIER_QUERY (1 << 16)
+# define membarrier(...) syscall(SYS_membarrier, __VA_ARGS__)
+#else
+# undef RCU_MEMBARRIER
+# define RCU_MB
+#endif
+#endif
/*
* This code section can only be included in LGPL 2.1 compatible source code.
/*
* The signal number used by the RCU library can be overridden with
- * -DSIGURCU= when compiling the library.
+ * -DSIGRCU= when compiling the library.
+ * Provide backward compatibility for liburcu 0.3.x SIGURCU.
*/
-#ifndef SIGURCU
-#define SIGURCU SIGUSR1
+#ifdef SIGURCU
+#define SIGRCU SIGURCU
+#endif
+
+#ifndef SIGRCU
+#define SIGRCU SIGUSR1
#endif
/*
* If a reader is really non-cooperative and refuses to commit its
- * urcu_active_readers count to memory (there is no barrier in the reader
+ * rcu_active_readers count to memory (there is no barrier in the reader
* per-se), kick it after a few loops waiting for it.
*/
#define KICK_READER_LOOPS 10000
#define YIELD_WRITE (1 << 1)
/*
- * Updates without URCU_MB are much slower. Account this in
- * the delay.
+ * Updates with RCU_SIGNAL are much slower. Account this in the delay.
*/
-#ifdef URCU_MB
+#ifdef RCU_SIGNAL
/* maximum sleep delay, in us */
-#define MAX_SLEEP 50
-#else
#define MAX_SLEEP 30000
+#else
+#define MAX_SLEEP 50
#endif
extern unsigned int yield_active;
}
#endif
-#ifdef URCU_MB
-static inline void reader_barrier()
+/*
+ * RCU memory barrier broadcast group. Currently, only broadcast to all process
+ * threads is supported (group 0).
+ *
+ * Slave barriers are only guaranteed to be ordered wrt master barriers.
+ *
+ * The pair ordering is detailed as (O: ordered, X: not ordered) :
+ * slave master
+ * slave X O
+ * master O O
+ */
+
+#define MB_GROUP_ALL 0
+#define RCU_MB_GROUP MB_GROUP_ALL
+
+#ifdef RCU_MEMBARRIER
+extern int has_sys_membarrier;
+
+static inline void smp_mb_slave(int group)
+{
+ if (likely(has_sys_membarrier))
+ barrier();
+ else
+ smp_mb();
+}
+#endif
+
+#ifdef RCU_MB
+static inline void smp_mb_slave(int group)
{
smp_mb();
}
-#else
-static inline void reader_barrier()
+#endif
+
+#ifdef RCU_SIGNAL
+static inline void smp_mb_slave(int group)
{
barrier();
}
#endif
/*
- * The trick here is that RCU_GP_CTR_BIT must be a multiple of 8 so we can use a
- * full 8-bits, 16-bits or 32-bits bitmask for the lower order bits.
+ * The trick here is that RCU_GP_CTR_PHASE must be a multiple of 8 so we can use
+ * a full 8-bits, 16-bits or 32-bits bitmask for the lower order bits.
*/
#define RCU_GP_COUNT (1UL << 0)
/* Use the amount of bits equal to half of the architecture long size */
-#define RCU_GP_CTR_BIT (1UL << (sizeof(long) << 2))
-#define RCU_GP_CTR_NEST_MASK (RCU_GP_CTR_BIT - 1)
+#define RCU_GP_CTR_PHASE (1UL << (sizeof(unsigned long) << 2))
+#define RCU_GP_CTR_NEST_MASK (RCU_GP_CTR_PHASE - 1)
/*
* Global quiescent period counter with low-order bits unused.
* Using a int rather than a char to eliminate false register dependencies
* causing stalls on some architectures.
*/
-extern long urcu_gp_ctr;
+extern unsigned long rcu_gp_ctr;
-struct urcu_reader {
+struct rcu_reader {
/* Data used by both reader and synchronize_rcu() */
- long ctr;
+ unsigned long ctr;
char need_mb;
/* Data used for registry */
struct list_head head __attribute__((aligned(CACHE_LINE_SIZE)));
pthread_t tid;
};
-extern struct urcu_reader __thread urcu_reader;
+extern struct rcu_reader __thread rcu_reader;
extern int gp_futex;
{
if (unlikely(uatomic_read(&gp_futex) == -1)) {
uatomic_set(&gp_futex, 0);
- futex(&gp_futex, FUTEX_WAKE, 1,
+ futex_async(&gp_futex, FUTEX_WAKE, 1,
NULL, NULL, 0);
}
}
-static inline int rcu_old_gp_ongoing(long *value)
+static inline int rcu_gp_ongoing(unsigned long *ctr)
{
- long v;
+ unsigned long v;
- if (value == NULL)
- return 0;
/*
* Make sure both tests below are done on the same version of *value
* to insure consistency.
*/
- v = LOAD_SHARED(*value);
+ v = LOAD_SHARED(*ctr);
return (v & RCU_GP_CTR_NEST_MASK) &&
- ((v ^ urcu_gp_ctr) & RCU_GP_CTR_BIT);
+ ((v ^ rcu_gp_ctr) & RCU_GP_CTR_PHASE);
}
static inline void _rcu_read_lock(void)
{
- long tmp;
+ unsigned long tmp;
- tmp = urcu_reader.ctr;
- /* urcu_gp_ctr = RCU_GP_COUNT | (~RCU_GP_CTR_BIT or RCU_GP_CTR_BIT) */
+ tmp = rcu_reader.ctr;
+ /*
+ * rcu_gp_ctr is
+ * RCU_GP_COUNT | (~RCU_GP_CTR_PHASE or RCU_GP_CTR_PHASE)
+ */
if (likely(!(tmp & RCU_GP_CTR_NEST_MASK))) {
- _STORE_SHARED(urcu_reader.ctr, _LOAD_SHARED(urcu_gp_ctr));
+ _STORE_SHARED(rcu_reader.ctr, _LOAD_SHARED(rcu_gp_ctr));
/*
* Set active readers count for outermost nesting level before
- * accessing the pointer. See force_mb_all_threads().
+ * accessing the pointer. See smp_mb_master().
*/
- reader_barrier();
+ smp_mb_slave(RCU_MB_GROUP);
} else {
- _STORE_SHARED(urcu_reader.ctr, tmp + RCU_GP_COUNT);
+ _STORE_SHARED(rcu_reader.ctr, tmp + RCU_GP_COUNT);
}
}
static inline void _rcu_read_unlock(void)
{
- long tmp;
+ unsigned long tmp;
- tmp = urcu_reader.ctr;
+ tmp = rcu_reader.ctr;
/*
* Finish using rcu before decrementing the pointer.
- * See force_mb_all_threads().
+ * See smp_mb_master().
*/
if (likely((tmp & RCU_GP_CTR_NEST_MASK) == RCU_GP_COUNT)) {
- reader_barrier();
- _STORE_SHARED(urcu_reader.ctr, urcu_reader.ctr - RCU_GP_COUNT);
- /* write urcu_reader.ctr before read futex */
- reader_barrier();
+ smp_mb_slave(RCU_MB_GROUP);
+ _STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT);
+ /* write rcu_reader.ctr before read futex */
+ smp_mb_slave(RCU_MB_GROUP);
wake_up_gp();
} else {
- _STORE_SHARED(urcu_reader.ctr, urcu_reader.ctr - RCU_GP_COUNT);
+ _STORE_SHARED(rcu_reader.ctr, rcu_reader.ctr - RCU_GP_COUNT);
}
}
-/**
- * _rcu_assign_pointer - assign (publicize) a pointer to a new data structure
- * meant to be read by RCU read-side critical sections. Returns the assigned
- * value.
- *
- * Documents which pointers will be dereferenced by RCU read-side critical
- * sections and adds the required memory barriers on architectures requiring
- * them. It also makes sure the compiler does not reorder code initializing the
- * data structure before its publication.
- *
- * Should match rcu_dereference_pointer().
- */
-
-#define _rcu_assign_pointer(p, v) \
- ({ \
- if (!__builtin_constant_p(v) || \
- ((v) != NULL)) \
- wmb(); \
- STORE_SHARED(p, v); \
- })
-
-/**
- * _rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer
- * is as expected by "old". If succeeds, returns the previous pointer to the
- * data structure, which can be safely freed after waiting for a quiescent state
- * using synchronize_rcu(). If fails (unexpected value), returns old (which
- * should not be freed !).
- */
-
-#define _rcu_cmpxchg_pointer(p, old, _new) \
- ({ \
- if (!__builtin_constant_p(_new) || \
- ((_new) != NULL)) \
- wmb(); \
- uatomic_cmpxchg(p, old, _new); \
- })
-
-/**
- * _rcu_xchg_pointer - same as rcu_assign_pointer, but returns the previous
- * pointer to the data structure, which can be safely freed after waiting for a
- * quiescent state using synchronize_rcu().
- */
-
-#define _rcu_xchg_pointer(p, v) \
- ({ \
- if (!__builtin_constant_p(v) || \
- ((v) != NULL)) \
- wmb(); \
- uatomic_xchg(p, v); \
- })
-
-/*
- * Exchanges the pointer and waits for quiescent state.
- * The pointer returned can be freed.
- */
-#define _rcu_publish_content(p, v) \
- ({ \
- void *oldptr; \
- oldptr = _rcu_xchg_pointer(p, v); \
- synchronize_rcu(); \
- oldptr; \
- })
+#ifdef __cplusplus
+}
+#endif
#endif /* _URCU_STATIC_H */