[urcu.git] / urcu.h

#ifndef _URCU_H
#define _URCU_H

/*
 * urcu.h
 *
 * Userspace RCU header
 *
 * Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
 *
 * Credits for Paul e. McKenney <paulmck@linux.vnet.ibm.com>
 * for inspiration coming from the Linux kernel RCU and rcu-preempt.
 *
 * The barrier, mb, rmb, wmb, atomic_inc, smp_read_barrier_depends, ACCESS_ONCE
 * and rcu_dereference primitives come from the Linux kernel.
 *
 * Distributed under GPLv2
 */

#include <stdlib.h>
#include <pthread.h>

/* The "volatile" is due to gcc bugs */
#define barrier() __asm__ __volatile__("": : :"memory")

#define likely(x)       __builtin_expect(!!(x), 1)
#define unlikely(x)     __builtin_expect(!!(x), 0)

/*
 * Assume the architecture has coherent caches. Blackfin will want this unset.
 */
#define CONFIG_HAVE_MEM_COHERENCY 1

/* Assume P4 or newer */
#define CONFIG_HAVE_FENCE 1

/* Assume SMP machine, given we don't have this information */
#define CONFIG_SMP 1


#ifdef CONFIG_HAVE_MEM_COHERENCY
/*
 * Caches are coherent, no need to flush them.
 */
#define mc()	barrier()
#define rmc()	barrier()
#define wmc()	barrier()
#else
#error "The architecture must create its own cache flush primitives"
#define mc()	arch_cache_flush()
#define rmc()	arch_cache_flush_read()
#define wmc()	arch_cache_flush_write()
#endif


#ifdef CONFIG_HAVE_MEM_COHERENCY

/* x86 32/64 specific */
#ifdef CONFIG_HAVE_FENCE
#define mb()    asm volatile("mfence":::"memory")
#define rmb()   asm volatile("lfence":::"memory")
#define wmb()   asm volatile("sfence"::: "memory")
#else
/*
 * Some non-Intel clones support out of order store. wmb() ceases to be a
 * nop for these.
 */
#define mb()    asm volatile("lock; addl $0,0(%%esp)":::"memory")
#define rmb()   asm volatile("lock; addl $0,0(%%esp)":::"memory")
#define wmb()   asm volatile("lock; addl $0,0(%%esp)"::: "memory")
#endif

#else /* !CONFIG_HAVE_MEM_COHERENCY */

/*
 * Without cache coherency, the memory barriers become cache flushes.
 */
#define mb()    mc()
#define rmb()   rmc()
#define wmb()   wmc()

#endif /* !CONFIG_HAVE_MEM_COHERENCY */


#ifdef CONFIG_SMP
#define smp_mb()	mb()
#define smp_rmb()	rmb()
#define smp_wmb()	wmb()
#define smp_mc()	mc()
#define smp_rmc()	rmc()
#define smp_wmc()	wmc()
#else
#define smp_mb()	barrier()
#define smp_rmb()	barrier()
#define smp_wmb()	barrier()
#define smp_mc()	barrier()
#define smp_rmc()	barrier()
#define smp_wmc()	barrier()
#endif

/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
static inline void rep_nop(void)
{
	asm volatile("rep; nop" ::: "memory");
}

static inline void cpu_relax(void)
{
	rep_nop();
}

static inline void atomic_inc(int *v)
{
	asm volatile("lock; incl %0"
		     : "+m" (*v));
}

#define xchg(ptr, v)							\
	((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr))))

struct __xchg_dummy {
	unsigned long a[100];
};
#define __xg(x) ((struct __xchg_dummy *)(x))

/*
 * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
 * Note 2: xchg has side effect, so that attribute volatile is necessary,
 *	  but generally the primitive is invalid, *ptr is output argument. --ANK
 * x is considered local, ptr is considered remote.
 */
static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
				   int size)
{
	switch (size) {
	case 1:
		asm volatile("xchgb %b0,%1"
			     : "=q" (x)
			     : "m" (*__xg(ptr)), "0" (x)
			     : "memory");
		break;
	case 2:
		asm volatile("xchgw %w0,%1"
			     : "=r" (x)
			     : "m" (*__xg(ptr)), "0" (x)
			     : "memory");
		break;
	case 4:
		asm volatile("xchgl %k0,%1"
			     : "=r" (x)
			     : "m" (*__xg(ptr)), "0" (x)
			     : "memory");
		break;
	case 8:
		asm volatile("xchgq %0,%1"
			     : "=r" (x)
			     : "m" (*__xg(ptr)), "0" (x)
			     : "memory");
		break;
	}
	smp_wmc();
	return x;
}

/* Nop everywhere except on alpha. */
#define smp_read_barrier_depends()

/*
 * Prevent the compiler from merging or refetching accesses.  The compiler
 * is also forbidden from reordering successive instances of ACCESS_ONCE(),
 * but only when the compiler is aware of some particular ordering.  One way
 * to make the compiler aware of ordering is to put the two invocations of
 * ACCESS_ONCE() in different C statements.
 *
 * This macro does absolutely -nothing- to prevent the CPU from reordering,
 * merging, or refetching absolutely anything at any time.  Its main intended
 * use is to mediate communication between process-level code and irq/NMI
 * handlers, all running on the same CPU.
 */
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))

/*
 * Identify a shared load. A smp_rmc() or smp_mc() should come before the load.
 */
#define _LOAD_SHARED(p)	       ACCESS_ONCE(p)

/*
 * Load a data from shared memory, doing a cache flush if required.
 */
#define LOAD_SHARED(p) \
	({ \
		smp_rmc(); \
		_LOAD_SHARED(p); \
	})


/*
 * Identify a shared store. A smp_wmc() or smp_mc() should follow the store.
 */
#define _STORE_SHARED(x, v) \
	do { \
		(x) = (v); \
	} while (0)

/*
 * Store v into x, where x is located in shared memory. Performs the required
 * cache flush after writing.
 */
#define STORE_SHARED(x, v) \
	do { \
		_STORE_SHARED(x, v); \
		smp_wmc(); \
	} while (0)

/**
 * rcu_dereference - fetch an RCU-protected pointer in an
 * RCU read-side critical section.  This pointer may later
 * be safely dereferenced.
 *
 * Inserts memory barriers on architectures that require them
 * (currently only the Alpha), and, more importantly, documents
 * exactly which pointers are protected by RCU.
 */

#define rcu_dereference(p)     ({ \
				typeof(p) _________p1 = LOAD_SHARED(p); \
				smp_read_barrier_depends(); \
				(_________p1); \
				})

#define SIGURCU SIGUSR1

/*
 * If a reader is really non-cooperative and refuses to commit its
 * urcu_active_readers count to memory (there is no barrier in the reader
 * per-se), kick it after a few loops waiting for it.
 */
#define KICK_READER_LOOPS 10000

#ifdef DEBUG_YIELD
#include <sched.h>
#include <time.h>
#include <pthread.h>
#include <unistd.h>

#define YIELD_READ 	(1 << 0)
#define YIELD_WRITE	(1 << 1)

/* Updates without DEBUG_FULL_MB are much slower. Account this in the delay */
#ifdef DEBUG_FULL_MB
/* maximum sleep delay, in us */
#define MAX_SLEEP 50
#else
#define MAX_SLEEP 30000
#endif

extern unsigned int yield_active;
extern unsigned int __thread rand_yield;

static inline void debug_yield_read(void)
{
	if (yield_active & YIELD_READ)
		if (rand_r(&rand_yield) & 0x1)
			usleep(rand_r(&rand_yield) % MAX_SLEEP);
}

static inline void debug_yield_write(void)
{
	if (yield_active & YIELD_WRITE)
		if (rand_r(&rand_yield) & 0x1)
			usleep(rand_r(&rand_yield) % MAX_SLEEP);
}

static inline void debug_yield_init(void)
{
	rand_yield = time(NULL) ^ pthread_self();
}
#else
static inline void debug_yield_read(void)
{
}

static inline void debug_yield_write(void)
{
}

static inline void debug_yield_init(void)
{

}
#endif

#ifdef DEBUG_FULL_MB
static inline void reader_barrier()
{
	smp_mb();
}
#else
static inline void reader_barrier()
{
	barrier();
}
#endif

/*
 * The trick here is that RCU_GP_CTR_BIT must be a multiple of 8 so we can use a
 * full 8-bits, 16-bits or 32-bits bitmask for the lower order bits.
 */
#define RCU_GP_COUNT		(1UL << 0)
/* Use the amount of bits equal to half of the architecture long size */
#define RCU_GP_CTR_BIT		(1UL << (sizeof(long) << 2))
#define RCU_GP_CTR_NEST_MASK	(RCU_GP_CTR_BIT - 1)

/*
 * Global quiescent period counter with low-order bits unused.
 * Using a int rather than a char to eliminate false register dependencies
 * causing stalls on some architectures.
 */
extern long urcu_gp_ctr;

extern long __thread urcu_active_readers;

static inline int rcu_old_gp_ongoing(long *value)
{
	long v;

	if (value == NULL)
		return 0;
	/*
	 * Make sure both tests below are done on the same version of *value
	 * to insure consistency.
	 */
	v = LOAD_SHARED(*value);
	return (v & RCU_GP_CTR_NEST_MASK) &&
		 ((v ^ urcu_gp_ctr) & RCU_GP_CTR_BIT);
}

static inline void rcu_read_lock(void)
{
	long tmp;

	tmp = urcu_active_readers;
	/* urcu_gp_ctr = RCU_GP_COUNT | (~RCU_GP_CTR_BIT or RCU_GP_CTR_BIT) */
	/*
	 * The data dependency "read urcu_gp_ctr, write urcu_active_readers",
	 * serializes those two memory operations. The memory barrier in the
	 * signal handler ensures we receive the proper memory commit barriers
	 * required by _STORE_SHARED and _LOAD_SHARED whenever communication
	 * with the writer is needed.
	 */
	if (likely(!(tmp & RCU_GP_CTR_NEST_MASK)))
		_STORE_SHARED(urcu_active_readers, _LOAD_SHARED(urcu_gp_ctr));
	else
		_STORE_SHARED(urcu_active_readers, tmp + RCU_GP_COUNT);
	/*
	 * Increment active readers count before accessing the pointer.
	 * See force_mb_all_threads().
	 */
	reader_barrier();
}

static inline void rcu_read_unlock(void)
{
	reader_barrier();
	/*
	 * Finish using rcu before decrementing the pointer.
	 * See force_mb_all_threads().
	 */
	_STORE_SHARED(urcu_active_readers, urcu_active_readers - RCU_GP_COUNT);
}

/**
 * rcu_assign_pointer - assign (publicize) a pointer to a newly
 * initialized structure that will be dereferenced by RCU read-side
 * critical sections.  Returns the value assigned.
 *
 * Inserts memory barriers on architectures that require them
 * (pretty much all of them other than x86), and also prevents
 * the compiler from reordering the code that initializes the
 * structure after the pointer assignment.  More importantly, this
 * call documents which pointers will be dereferenced by RCU read-side
 * code.
 */

#define rcu_assign_pointer(p, v) \
	({ \
		if (!__builtin_constant_p(v) || \
		    ((v) != NULL)) \
			wmb(); \
		STORE_SHARED(p, v); \
	})

#define rcu_xchg_pointer(p, v) \
	({ \
		if (!__builtin_constant_p(v) || \
		    ((v) != NULL)) \
			wmb(); \
		xchg(p, v); \
	})

extern void synchronize_rcu(void);

/*
 * Exchanges the pointer and waits for quiescent state.
 * The pointer returned can be freed.
 */
#define urcu_publish_content(p, v) \
	({ \
		void *oldptr; \
		oldptr = rcu_xchg_pointer(p, v); \
		synchronize_rcu(); \
		oldptr; \
	})

/*
 * Reader thread registration.
 */
extern void urcu_register_thread(void);
extern void urcu_unregister_thread(void);

#endif /* _URCU_H */
Commit	Line	Data
	1	#ifndef _URCU_H
	2	#define _URCU_H
	3
	4	/*
	5	* urcu.h
	6	*
	7	* Userspace RCU header
	8	*
	9	* Copyright February 2009 - Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
	10	*
	11	* Credits for Paul e. McKenney <paulmck@linux.vnet.ibm.com>
	12	* for inspiration coming from the Linux kernel RCU and rcu-preempt.
	13	*
	14	* The barrier, mb, rmb, wmb, atomic_inc, smp_read_barrier_depends, ACCESS_ONCE
	15	* and rcu_dereference primitives come from the Linux kernel.
	16	*
	17	* Distributed under GPLv2
	18	*/
	19
	20	#include <stdlib.h>
	21	#include <pthread.h>
	22
	23	/* The "volatile" is due to gcc bugs */
	24	#define barrier() __asm__ __volatile__("": : :"memory")
	25
	26	#define likely(x) __builtin_expect(!!(x), 1)
	27	#define unlikely(x) __builtin_expect(!!(x), 0)
	28
	29	/*
	30	* Assume the architecture has coherent caches. Blackfin will want this unset.
	31	*/
	32	#define CONFIG_HAVE_MEM_COHERENCY 1
	33
	34	/* Assume P4 or newer */
	35	#define CONFIG_HAVE_FENCE 1
	36
	37	/* Assume SMP machine, given we don't have this information */
	38	#define CONFIG_SMP 1
	39
	40
	41	#ifdef CONFIG_HAVE_MEM_COHERENCY
	42	/*
	43	* Caches are coherent, no need to flush them.
	44	*/
	45	#define mc() barrier()
	46	#define rmc() barrier()
	47	#define wmc() barrier()
	48	#else
	49	#error "The architecture must create its own cache flush primitives"
	50	#define mc() arch_cache_flush()
	51	#define rmc() arch_cache_flush_read()
	52	#define wmc() arch_cache_flush_write()
	53	#endif
	54
	55
	56	#ifdef CONFIG_HAVE_MEM_COHERENCY
	57
	58	/* x86 32/64 specific */
	59	#ifdef CONFIG_HAVE_FENCE
	60	#define mb() asm volatile("mfence":::"memory")
	61	#define rmb() asm volatile("lfence":::"memory")
	62	#define wmb() asm volatile("sfence"::: "memory")
	63	#else
	64	/*
	65	* Some non-Intel clones support out of order store. wmb() ceases to be a
	66	* nop for these.
	67	*/
	68	#define mb() asm volatile("lock; addl $0,0(%%esp)":::"memory")
	69	#define rmb() asm volatile("lock; addl $0,0(%%esp)":::"memory")
	70	#define wmb() asm volatile("lock; addl $0,0(%%esp)"::: "memory")
	71	#endif
	72
	73	#else /* !CONFIG_HAVE_MEM_COHERENCY */
	74
	75	/*
	76	* Without cache coherency, the memory barriers become cache flushes.
	77	*/
	78	#define mb() mc()
	79	#define rmb() rmc()
	80	#define wmb() wmc()
	81
	82	#endif /* !CONFIG_HAVE_MEM_COHERENCY */
	83
	84
	85	#ifdef CONFIG_SMP
	86	#define smp_mb() mb()
	87	#define smp_rmb() rmb()
	88	#define smp_wmb() wmb()
	89	#define smp_mc() mc()
	90	#define smp_rmc() rmc()
	91	#define smp_wmc() wmc()
	92	#else
	93	#define smp_mb() barrier()
	94	#define smp_rmb() barrier()
	95	#define smp_wmb() barrier()
	96	#define smp_mc() barrier()
	97	#define smp_rmc() barrier()
	98	#define smp_wmc() barrier()
	99	#endif
	100
	101	/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
	102	static inline void rep_nop(void)
	103	{
	104	asm volatile("rep; nop" ::: "memory");
	105	}
	106
	107	static inline void cpu_relax(void)
	108	{
	109	rep_nop();
	110	}
	111
	112	static inline void atomic_inc(int *v)
	113	{
	114	asm volatile("lock; incl %0"
	115	: "+m" (*v));
	116	}
	117
	118	#define xchg(ptr, v) \
	119	((__typeof__((ptr)))__xchg((unsigned long)(v), (ptr), sizeof((ptr))))
	120
	121	struct __xchg_dummy {
	122	unsigned long a[100];
	123	};
	124	#define __xg(x) ((struct __xchg_dummy *)(x))
	125
	126	/*
	127	* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
	128	* Note 2: xchg has side effect, so that attribute volatile is necessary,
	129	* but generally the primitive is invalid, *ptr is output argument. --ANK
	130	* x is considered local, ptr is considered remote.
	131	*/
	132	static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
	133	int size)
	134	{
	135	switch (size) {
	136	case 1:
	137	asm volatile("xchgb %b0,%1"
	138	: "=q" (x)
	139	: "m" (*__xg(ptr)), "0" (x)
	140	: "memory");
	141	break;
	142	case 2:
	143	asm volatile("xchgw %w0,%1"
	144	: "=r" (x)
	145	: "m" (*__xg(ptr)), "0" (x)
	146	: "memory");
	147	break;
	148	case 4:
	149	asm volatile("xchgl %k0,%1"
	150	: "=r" (x)
	151	: "m" (*__xg(ptr)), "0" (x)
	152	: "memory");
	153	break;
	154	case 8:
	155	asm volatile("xchgq %0,%1"
	156	: "=r" (x)
	157	: "m" (*__xg(ptr)), "0" (x)
	158	: "memory");
	159	break;
	160	}
	161	smp_wmc();
	162	return x;
	163	}
	164
	165	/* Nop everywhere except on alpha. */
	166	#define smp_read_barrier_depends()
	167
	168	/*
	169	* Prevent the compiler from merging or refetching accesses. The compiler
	170	* is also forbidden from reordering successive instances of ACCESS_ONCE(),
	171	* but only when the compiler is aware of some particular ordering. One way
	172	* to make the compiler aware of ordering is to put the two invocations of
	173	* ACCESS_ONCE() in different C statements.
	174	*
	175	* This macro does absolutely -nothing- to prevent the CPU from reordering,
	176	* merging, or refetching absolutely anything at any time. Its main intended
	177	* use is to mediate communication between process-level code and irq/NMI
	178	* handlers, all running on the same CPU.
	179	*/
	180	#define ACCESS_ONCE(x) ((volatile typeof(x) )&(x))
	181
	182	/*
	183	* Identify a shared load. A smp_rmc() or smp_mc() should come before the load.
	184	*/
	185	#define _LOAD_SHARED(p) ACCESS_ONCE(p)
	186
	187	/*
	188	* Load a data from shared memory, doing a cache flush if required.
	189	*/
	190	#define LOAD_SHARED(p) \
	191	({ \
	192	smp_rmc(); \
	193	_LOAD_SHARED(p); \
	194	})
	195
	196
	197	/*
	198	* Identify a shared store. A smp_wmc() or smp_mc() should follow the store.
	199	*/
	200	#define _STORE_SHARED(x, v) \
	201	do { \
	202	(x) = (v); \
	203	} while (0)
	204
	205	/*
	206	* Store v into x, where x is located in shared memory. Performs the required
	207	* cache flush after writing.
	208	*/
	209	#define STORE_SHARED(x, v) \
	210	do { \
	211	_STORE_SHARED(x, v); \
	212	smp_wmc(); \
	213	} while (0)
	214
	215	/**
	216	* rcu_dereference - fetch an RCU-protected pointer in an
	217	* RCU read-side critical section. This pointer may later
	218	* be safely dereferenced.
	219	*
	220	* Inserts memory barriers on architectures that require them
	221	* (currently only the Alpha), and, more importantly, documents
	222	* exactly which pointers are protected by RCU.
	223	*/
	224
	225	#define rcu_dereference(p) ({ \
	226	typeof(p) _________p1 = LOAD_SHARED(p); \
	227	smp_read_barrier_depends(); \
	228	(_________p1); \
	229	})
	230
	231	#define SIGURCU SIGUSR1
	232
	233	/*
	234	* If a reader is really non-cooperative and refuses to commit its
	235	* urcu_active_readers count to memory (there is no barrier in the reader
	236	* per-se), kick it after a few loops waiting for it.
	237	*/
	238	#define KICK_READER_LOOPS 10000
	239
	240	#ifdef DEBUG_YIELD
	241	#include <sched.h>
	242	#include <time.h>
	243	#include <pthread.h>
	244	#include <unistd.h>
	245
	246	#define YIELD_READ (1 << 0)
	247	#define YIELD_WRITE (1 << 1)
	248
	249	/* Updates without DEBUG_FULL_MB are much slower. Account this in the delay */
	250	#ifdef DEBUG_FULL_MB
	251	/* maximum sleep delay, in us */
	252	#define MAX_SLEEP 50
	253	#else
	254	#define MAX_SLEEP 30000
	255	#endif
	256
	257	extern unsigned int yield_active;
	258	extern unsigned int __thread rand_yield;
	259
	260	static inline void debug_yield_read(void)
	261	{
	262	if (yield_active & YIELD_READ)
	263	if (rand_r(&rand_yield) & 0x1)
	264	usleep(rand_r(&rand_yield) % MAX_SLEEP);
	265	}
	266
	267	static inline void debug_yield_write(void)
	268	{
	269	if (yield_active & YIELD_WRITE)
	270	if (rand_r(&rand_yield) & 0x1)
	271	usleep(rand_r(&rand_yield) % MAX_SLEEP);
	272	}
	273
	274	static inline void debug_yield_init(void)
	275	{
	276	rand_yield = time(NULL) ^ pthread_self();
	277	}
	278	#else
	279	static inline void debug_yield_read(void)
	280	{
	281	}
	282
	283	static inline void debug_yield_write(void)
	284	{
	285	}
	286
	287	static inline void debug_yield_init(void)
	288	{
	289
	290	}
	291	#endif
	292
	293	#ifdef DEBUG_FULL_MB
	294	static inline void reader_barrier()
	295	{
	296	smp_mb();
	297	}
	298	#else
	299	static inline void reader_barrier()
	300	{
	301	barrier();
	302	}
	303	#endif
	304
	305	/*
	306	* The trick here is that RCU_GP_CTR_BIT must be a multiple of 8 so we can use a
	307	* full 8-bits, 16-bits or 32-bits bitmask for the lower order bits.
	308	*/
	309	#define RCU_GP_COUNT (1UL << 0)
	310	/* Use the amount of bits equal to half of the architecture long size */
	311	#define RCU_GP_CTR_BIT (1UL << (sizeof(long) << 2))
	312	#define RCU_GP_CTR_NEST_MASK (RCU_GP_CTR_BIT - 1)
	313
	314	/*
	315	* Global quiescent period counter with low-order bits unused.
	316	* Using a int rather than a char to eliminate false register dependencies
	317	* causing stalls on some architectures.
	318	*/
	319	extern long urcu_gp_ctr;
	320
	321	extern long __thread urcu_active_readers;
	322
	323	static inline int rcu_old_gp_ongoing(long *value)
	324	{
	325	long v;
	326
	327	if (value == NULL)
	328	return 0;
	329	/*
	330	* Make sure both tests below are done on the same version of *value
	331	* to insure consistency.
	332	*/
	333	v = LOAD_SHARED(*value);
	334	return (v & RCU_GP_CTR_NEST_MASK) &&
	335	((v ^ urcu_gp_ctr) & RCU_GP_CTR_BIT);
	336	}
	337
	338	static inline void rcu_read_lock(void)
	339	{
	340	long tmp;
	341
	342	tmp = urcu_active_readers;
	343	/* urcu_gp_ctr = RCU_GP_COUNT \| (~RCU_GP_CTR_BIT or RCU_GP_CTR_BIT) */
	344	/*
	345	* The data dependency "read urcu_gp_ctr, write urcu_active_readers",
	346	* serializes those two memory operations. The memory barrier in the
	347	* signal handler ensures we receive the proper memory commit barriers
	348	* required by _STORE_SHARED and _LOAD_SHARED whenever communication
	349	* with the writer is needed.
	350	*/
	351	if (likely(!(tmp & RCU_GP_CTR_NEST_MASK)))
	352	_STORE_SHARED(urcu_active_readers, _LOAD_SHARED(urcu_gp_ctr));
	353	else
	354	_STORE_SHARED(urcu_active_readers, tmp + RCU_GP_COUNT);
	355	/*
	356	* Increment active readers count before accessing the pointer.
	357	* See force_mb_all_threads().
	358	*/
	359	reader_barrier();
	360	}
	361
	362	static inline void rcu_read_unlock(void)
	363	{
	364	reader_barrier();
	365	/*
	366	* Finish using rcu before decrementing the pointer.
	367	* See force_mb_all_threads().
	368	*/
	369	_STORE_SHARED(urcu_active_readers, urcu_active_readers - RCU_GP_COUNT);
	370	}
	371
	372	/**
	373	* rcu_assign_pointer - assign (publicize) a pointer to a newly
	374	* initialized structure that will be dereferenced by RCU read-side
	375	* critical sections. Returns the value assigned.
	376	*
	377	* Inserts memory barriers on architectures that require them
	378	* (pretty much all of them other than x86), and also prevents
	379	* the compiler from reordering the code that initializes the
	380	* structure after the pointer assignment. More importantly, this
	381	* call documents which pointers will be dereferenced by RCU read-side
	382	* code.
	383	*/
	384
	385	#define rcu_assign_pointer(p, v) \
	386	({ \
	387	if (!__builtin_constant_p(v) \|\| \
	388	((v) != NULL)) \
	389	wmb(); \
	390	STORE_SHARED(p, v); \
	391	})
	392
	393	#define rcu_xchg_pointer(p, v) \
	394	({ \
	395	if (!__builtin_constant_p(v) \|\| \
	396	((v) != NULL)) \
	397	wmb(); \
	398	xchg(p, v); \
	399	})
	400
	401	extern void synchronize_rcu(void);
	402
	403	/*
	404	* Exchanges the pointer and waits for quiescent state.
	405	* The pointer returned can be freed.
	406	*/
	407	#define urcu_publish_content(p, v) \
	408	({ \
	409	void *oldptr; \
	410	oldptr = rcu_xchg_pointer(p, v); \
	411	synchronize_rcu(); \
	412	oldptr; \
	413	})
	414
	415	/*
	416	* Reader thread registration.
	417	*/
	418	extern void urcu_register_thread(void);
	419	extern void urcu_unregister_thread(void);
	420
	421	#endif /* _URCU_H */