From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 22 Sep 2011 14:50:04 +0000 (-0400)
Subject: Merge branch 'master' into urcu/ht-shrink
X-Git-Tag: v0.7.0~43^2~124
X-Git-Url: https://git.liburcu.org/?a=commitdiff_plain;h=8c35d699cf442f91fbba3c99beaa41a083ef7bff;hp=9357c41599e239897db0cc18e1fbaecd1065ebc0;p=urcu.git

Merge branch 'master' into urcu/ht-shrink
---

diff --git a/urcu-call-rcu-impl.h b/urcu-call-rcu-impl.h
index 700d128..f9250e8 100644
--- a/urcu-call-rcu-impl.h
+++ b/urcu-call-rcu-impl.h
@@ -386,6 +386,7 @@ int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
 	static int warned = 0;
 
 	call_rcu_lock(&call_rcu_mutex);
+	alloc_cpu_call_rcu_data();
 	if (cpu < 0 || maxcpus <= cpu) {
 		if (!warned) {
 			fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
@@ -395,13 +396,21 @@ int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
 		errno = EINVAL;
 		return -EINVAL;
 	}
-	alloc_cpu_call_rcu_data();
-	call_rcu_unlock(&call_rcu_mutex);
+
 	if (per_cpu_call_rcu_data == NULL) {
+		call_rcu_unlock(&call_rcu_mutex);
 		errno = ENOMEM;
 		return -ENOMEM;
 	}
+
+	if (per_cpu_call_rcu_data[cpu] != NULL && crdp != NULL) {
+		call_rcu_unlock(&call_rcu_mutex);
+		errno = EEXIST;
+		return -EEXIST;
+	}
+
 	per_cpu_call_rcu_data[cpu] = crdp;
+	call_rcu_unlock(&call_rcu_mutex);
 	return 0;
 }
 
@@ -435,22 +444,17 @@ struct call_rcu_data *get_default_call_rcu_data(void)
  */
 struct call_rcu_data *get_call_rcu_data(void)
 {
-	int curcpu;
-	static int warned = 0;
+	struct call_rcu_data *crd;
 
 	if (thread_call_rcu_data != NULL)
 		return thread_call_rcu_data;
-	if (maxcpus <= 0)
-		return get_default_call_rcu_data();
-	curcpu = sched_getcpu();
-	if (!warned && (curcpu < 0 || maxcpus <= curcpu)) {
-		fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
-		warned = 1;
+
+	if (maxcpus > 0) {
+		crd = get_cpu_call_rcu_data(sched_getcpu());
+		if (crd)
+			return crd;
 	}
-	if (curcpu >= 0 && maxcpus > curcpu &&
-	    per_cpu_call_rcu_data != NULL &&
-	    per_cpu_call_rcu_data[curcpu] != NULL)
-	    	return per_cpu_call_rcu_data[curcpu];
+
 	return get_default_call_rcu_data();
 }
 
@@ -518,8 +522,13 @@ int create_all_cpu_call_rcu_data(unsigned long flags)
 		}
 		call_rcu_unlock(&call_rcu_mutex);
 		if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
-			/* FIXME: Leaks crdp for now. */
-			return ret; /* Can happen on race. */
+			call_rcu_data_free(crdp);
+
+			/* it has been created by other thread */
+			if (ret == -EEXIST)
+				continue;
+
+			return ret;
 		}
 	}
 	return 0;
@@ -605,9 +614,11 @@ void call_rcu_data_free(struct call_rcu_data *crdp)
 		*cbs_endprev = cbs;
 		uatomic_add(&default_call_rcu_data->qlen,
 			    uatomic_read(&crdp->qlen));
-		cds_list_del(&crdp->list);
-		free(crdp);
+		wake_call_rcu_thread(default_call_rcu_data);
 	}
+
+	cds_list_del(&crdp->list);
+	free(crdp);
 }
 
 /*
@@ -656,7 +667,7 @@ void call_rcu_after_fork_parent(void)
  */
 void call_rcu_after_fork_child(void)
 {
-	struct call_rcu_data *crdp;
+	struct call_rcu_data *crdp, *next;
 
 	/* Release the mutex. */
 	call_rcu_unlock(&call_rcu_mutex);
@@ -669,12 +680,9 @@ void call_rcu_after_fork_child(void)
 	(void)get_default_call_rcu_data();
 
 	/* Dispose of all of the rest of the call_rcu_data structures. */
-	while (call_rcu_data_list.next != call_rcu_data_list.prev) {
-		crdp = cds_list_entry(call_rcu_data_list.prev,
-				      struct call_rcu_data, list);
+	cds_list_for_each_entry_safe(crdp, next, &call_rcu_data_list, list) {
 		if (crdp == default_call_rcu_data)
-			crdp = cds_list_entry(crdp->list.prev,
-					      struct call_rcu_data, list);
+			continue;
 		uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
 		call_rcu_data_free(crdp);
 	}
diff --git a/urcu-pointer.h b/urcu-pointer.h
index 027a18f..67ee381 100644
--- a/urcu-pointer.h
+++ b/urcu-pointer.h
@@ -67,37 +67,40 @@ extern "C" {
 extern void *rcu_dereference_sym(void *p);
 #define rcu_dereference(p)						     \
 	({								     \
-		typeof(p) _________p1 =					     \
-			rcu_dereference_sym((void *)(p));		     \
+		typeof(p) _________p1 =	URCU_FORCE_CAST(typeof(p),	     \
+			rcu_dereference_sym(URCU_FORCE_CAST(void *, p)));    \
 		(_________p1);						     \
 	})
 
 extern void *rcu_cmpxchg_pointer_sym(void **p, void *old, void *_new);
 #define rcu_cmpxchg_pointer(p, old, _new)				     \
 	({								     \
-		typeof(*p) _________pold = (old);			     \
-		typeof(*p) _________pnew = (_new);			     \
-		typeof(*p) _________p1 =				     \
-			rcu_cmpxchg_pointer_sym((void **)(p), _________pold, \
-						_________pnew);		     \
+		typeof(*(p)) _________pold = (old);			     \
+		typeof(*(p)) _________pnew = (_new);			     \
+		typeof(*(p)) _________p1 = URCU_FORCE_CAST(typeof(*(p)),     \
+			rcu_cmpxchg_pointer_sym(URCU_FORCE_CAST(void **, p),\
+						_________pold,		     \
+						_________pnew));	     \
 		(_________p1);						     \
 	})
 
 extern void *rcu_xchg_pointer_sym(void **p, void *v);
 #define rcu_xchg_pointer(p, v)						     \
 	({								     \
-		typeof(*p) _________pv = (v);			             \
-		typeof(*p) _________p1 =				     \
-			rcu_xchg_pointer_sym((void **)(p), _________pv);     \
+		typeof(*(p)) _________pv = (v);			             \
+		typeof(*(p)) _________p1 = URCU_FORCE_CAST(typeof(*(p)),     \
+			rcu_xchg_pointer_sym(URCU_FORCE_CAST(void **, p),    \
+					     _________pv));		     \
 		(_________p1);						     \
 	})
 
 extern void *rcu_set_pointer_sym(void **p, void *v);
 #define rcu_set_pointer(p, v)						     \
 	({								     \
-		typeof(*p) _________pv = (v);			             \
-		typeof(*p) _________p1 =				     \
-			rcu_set_pointer_sym((void **)(p), _________pv);	     \
+		typeof(*(p)) _________pv = (v);			             \
+		typeof(*(p)) _________p1 = URCU_FORCE_CAST(typeof(*(p)),     \
+			rcu_set_pointer_sym(URCU_FORCE_CAST(void **, p),     \
+					    _________pv));		     \
 	})
 
 #endif /* !_LGPL_SOURCE */
diff --git a/urcu/arch/ppc.h b/urcu/arch/ppc.h
index a03d688..048b217 100644
--- a/urcu/arch/ppc.h
+++ b/urcu/arch/ppc.h
@@ -32,7 +32,24 @@ extern "C" {
 /* Include size of POWER5+ L3 cache lines: 256 bytes */
 #define CAA_CACHE_LINE_SIZE	256
 
-#define cmm_mb()    asm volatile("sync":::"memory")
+/*
+ * Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not
+ * preserve ordering of cacheable vs. non-cacheable accesses, so it
+ * should not be used to order with respect to MMIO operations.  An
+ * eieio+lwsync pair is also not enough for cmm_rmb, because it will
+ * order cacheable and non-cacheable memory operations separately---i.e.
+ * not the latter against the former.
+ */
+#define cmm_mb()         asm volatile("sync":::"memory")
+
+/*
+ * lwsync orders loads in cacheable memory with respect to other loads,
+ * and stores in cacheable memory with respect to other stores.
+ * Therefore, use it for barriers ordering accesses to cacheable memory
+ * only.
+ */
+#define cmm_smp_rmb()    asm volatile("lwsync":::"memory")
+#define cmm_smp_wmb()    asm volatile("lwsync":::"memory")
 
 #define mftbl()						\
 	({ 						\
diff --git a/urcu/compiler.h b/urcu/compiler.h
index 4bced2a..6db803e 100644
--- a/urcu/compiler.h
+++ b/urcu/compiler.h
@@ -79,4 +79,10 @@
  */
 #define __rcu
 
+#ifdef __cplusplus
+#define URCU_FORCE_CAST(type, arg)	(reinterpret_cast<type>(arg))
+#else
+#define URCU_FORCE_CAST(type, arg)	((type) (arg))
+#endif
+
 #endif /* _URCU_COMPILER_H */
diff --git a/urcu/uatomic/ppc.h b/urcu/uatomic/ppc.h
index 3eb3d63..16dbd0c 100644
--- a/urcu/uatomic/ppc.h
+++ b/urcu/uatomic/ppc.h
@@ -36,10 +36,26 @@ extern "C" {
 #define ILLEGAL_INSTR	".long	0xd00d00"
 
 /*
- * Using a isync as second barrier for exchange to provide acquire semantic.
- * According to uatomic_ops/sysdeps/gcc/powerpc.h, the documentation is "fairly
- * explicit that this also has acquire semantics."
- * Derived from AO_compare_and_swap(), but removed the comparison.
+ * Providing sequential consistency semantic with respect to other
+ * instructions for cmpxchg and add_return family of atomic primitives.
+ *
+ * This is achieved with:
+ *   lwsync (prior loads can be reordered after following load)
+ *   lwarx
+ *   stwcx.
+ *   test if success (retry)
+ *   sync
+ *
+ * Explanation of the sequential consistency provided by this scheme
+ * from Paul E. McKenney:
+ *
+ * The reason we can get away with the lwsync before is that if a prior
+ * store reorders with the lwarx, then you have to store to the atomic
+ * variable from some other CPU to detect it.
+ *
+ * And if you do that, the lwarx will lose its reservation, so the stwcx
+ * will fail.  The atomic operation will retry, so that the caller won't be
+ * able to see the misordering.
  */
 
 /* xchg */
@@ -57,7 +73,7 @@ unsigned long _uatomic_exchange(void *addr, unsigned long val, int len)
 		"1:\t"	"lwarx %0,0,%1\n"	/* load and reserve */
 			"stwcx. %2,0,%1\n"	/* else store conditional */
 			"bne- 1b\n"	 	/* retry if lost reservation */
-			"isync\n"
+			"sync\n"
 				: "=&r"(result)
 				: "r"(addr), "r"(val)
 				: "memory", "cc");
@@ -74,7 +90,7 @@ unsigned long _uatomic_exchange(void *addr, unsigned long val, int len)
 		"1:\t"	"ldarx %0,0,%1\n"	/* load and reserve */
 			"stdcx. %2,0,%1\n"	/* else store conditional */
 			"bne- 1b\n"	 	/* retry if lost reservation */
-			"isync\n"
+			"sync\n"
 				: "=&r"(result)
 				: "r"(addr), "r"(val)
 				: "memory", "cc");
@@ -110,7 +126,7 @@ unsigned long _uatomic_cmpxchg(void *addr, unsigned long old,
 			"bne 2f\n"		/* old, fail */
 			"stwcx. %2,0,%1\n"	/* else store conditional */
 			"bne- 1b\n"	 	/* retry if lost reservation */
-			"isync\n"
+			"sync\n"
 		"2:\n"
 				: "=&r"(old_val)
 				: "r"(addr), "r"((unsigned int)_new),
@@ -131,7 +147,7 @@ unsigned long _uatomic_cmpxchg(void *addr, unsigned long old,
 			"bne 2f\n"		/* old, fail */
 			"stdcx. %2,0,%1\n"	/* else store conditional */
 			"bne- 1b\n"	 	/* retry if lost reservation */
-			"isync\n"
+			"sync\n"
 		"2:\n"
 				: "=&r"(old_val)
 				: "r"(addr), "r"((unsigned long)_new),
@@ -171,7 +187,7 @@ unsigned long _uatomic_add_return(void *addr, unsigned long val,
 			"add %0,%2,%0\n"	/* add val to value loaded */
 			"stwcx. %0,0,%1\n"	/* store conditional */
 			"bne- 1b\n"	 	/* retry if lost reservation */
-			"isync\n"
+			"sync\n"
 				: "=&r"(result)
 				: "r"(addr), "r"(val)
 				: "memory", "cc");
@@ -189,7 +205,7 @@ unsigned long _uatomic_add_return(void *addr, unsigned long val,
 			"add %0,%2,%0\n"	/* add val to value loaded */
 			"stdcx. %0,0,%1\n"	/* store conditional */
 			"bne- 1b\n"	 	/* retry if lost reservation */
-			"isync\n"
+			"sync\n"
 				: "=&r"(result)
 				: "r"(addr), "r"(val)
 				: "memory", "cc");