Merge branch 'master' into urcu/ht-shrink

author Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Thu, 22 Sep 2011 14:50:04 +0000 (10:50 -0400)

committer Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Thu, 22 Sep 2011 14:50:04 +0000 (10:50 -0400)
author Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Thu, 22 Sep 2011 14:50:04 +0000 (10:50 -0400)
committer Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Thu, 22 Sep 2011 14:50:04 +0000 (10:50 -0400)
diff --git a/urcu-call-rcu-impl.h b/urcu-call-rcu-impl.h

index 700d1289192a31a5f2693cf79e4efd99be993220..f9250e8b4129b925aeb7b47f609d3e91dd3f790d 100644 (file)
--- a/urcu-call-rcu-impl.h
+++ b/urcu-call-rcu-impl.h
@@ -386,6 +386,7 @@ int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
         static int warned = 0;
  
         call_rcu_lock(&call_rcu_mutex);
+       alloc_cpu_call_rcu_data();
         if (cpu < 0 || maxcpus <= cpu) {
                 if (!warned) {
                         fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
@@ -395,13 +396,21 @@ int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp)
                 errno = EINVAL;
                 return -EINVAL;
         }
-       alloc_cpu_call_rcu_data();
-       call_rcu_unlock(&call_rcu_mutex);
+
         if (per_cpu_call_rcu_data == NULL) {
+               call_rcu_unlock(&call_rcu_mutex);
                 errno = ENOMEM;
                 return -ENOMEM;
         }
+
+       if (per_cpu_call_rcu_data[cpu] != NULL && crdp != NULL) {
+               call_rcu_unlock(&call_rcu_mutex);
+               errno = EEXIST;
+               return -EEXIST;
+       }
+
         per_cpu_call_rcu_data[cpu] = crdp;
+       call_rcu_unlock(&call_rcu_mutex);
         return 0;
  }
  
@@ -435,22 +444,17 @@ struct call_rcu_data *get_default_call_rcu_data(void)
   */
  struct call_rcu_data *get_call_rcu_data(void)
  {
-       int curcpu;
-       static int warned = 0;
+       struct call_rcu_data *crd;
  
         if (thread_call_rcu_data != NULL)
                 return thread_call_rcu_data;
-       if (maxcpus <= 0)
-               return get_default_call_rcu_data();
-       curcpu = sched_getcpu();
-       if (!warned && (curcpu < 0 || maxcpus <= curcpu)) {
-               fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
-               warned = 1;
+
+       if (maxcpus > 0) {
+               crd = get_cpu_call_rcu_data(sched_getcpu());
+               if (crd)
+                       return crd;
         }
-       if (curcpu >= 0 && maxcpus > curcpu &&
-           per_cpu_call_rcu_data != NULL &&
-           per_cpu_call_rcu_data[curcpu] != NULL)
-               return per_cpu_call_rcu_data[curcpu];
+
         return get_default_call_rcu_data();
  }
  
@@ -518,8 +522,13 @@ int create_all_cpu_call_rcu_data(unsigned long flags)
                 }
                 call_rcu_unlock(&call_rcu_mutex);
                 if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
-                       /* FIXME: Leaks crdp for now. */
-                       return ret; /* Can happen on race. */
+                       call_rcu_data_free(crdp);
+
+                       /* it has been created by other thread */
+                       if (ret == -EEXIST)
+                               continue;
+
+                       return ret;
                 }
         }
         return 0;
@@ -605,9 +614,11 @@ void call_rcu_data_free(struct call_rcu_data *crdp)
                 *cbs_endprev = cbs;
                 uatomic_add(&default_call_rcu_data->qlen,
                             uatomic_read(&crdp->qlen));
-               cds_list_del(&crdp->list);
-               free(crdp);
+               wake_call_rcu_thread(default_call_rcu_data);
         }
+
+       cds_list_del(&crdp->list);
+       free(crdp);
  }
  
  /*
@@ -656,7 +667,7 @@ void call_rcu_after_fork_parent(void)
   */
  void call_rcu_after_fork_child(void)
  {
-       struct call_rcu_data *crdp;
+       struct call_rcu_data *crdp, *next;
  
         /* Release the mutex. */
         call_rcu_unlock(&call_rcu_mutex);
@@ -669,12 +680,9 @@ void call_rcu_after_fork_child(void)
         (void)get_default_call_rcu_data();
  
         /* Dispose of all of the rest of the call_rcu_data structures. */
-       while (call_rcu_data_list.next != call_rcu_data_list.prev) {
-               crdp = cds_list_entry(call_rcu_data_list.prev,
-                                     struct call_rcu_data, list);
+       cds_list_for_each_entry_safe(crdp, next, &call_rcu_data_list, list) {
                 if (crdp == default_call_rcu_data)
-                       crdp = cds_list_entry(crdp->list.prev,
-                                             struct call_rcu_data, list);
+                       continue;
                 uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
                 call_rcu_data_free(crdp);
         }
diff --git a/urcu-pointer.h b/urcu-pointer.h

index 027a18fd4d0b57159a096e5a575d1618ddcfe364..67ee381686903f2ab59de27362748730d61919a9 100644 (file)
--- a/urcu-pointer.h
+++ b/urcu-pointer.h
@@ -67,37 +67,40 @@ extern "C" {
  extern void *rcu_dereference_sym(void *p);
  #define rcu_dereference(p)                                                  \
         ({                                                                   \
-               typeof(p) _________p1 =                                      \
-                       rcu_dereference_sym((void *)(p));                    \
+               typeof(p) _________p1 = URCU_FORCE_CAST(typeof(p),           \
+                       rcu_dereference_sym(URCU_FORCE_CAST(void *, p)));    \
                 (_________p1);                                               \
         })
  
  extern void *rcu_cmpxchg_pointer_sym(void **p, void *old, void *_new);
  #define rcu_cmpxchg_pointer(p, old, _new)                                   \
         ({                                                                   \
-               typeof(*p) _________pold = (old);                            \
-               typeof(*p) _________pnew = (_new);                           \
-               typeof(*p) _________p1 =                                     \
-                       rcu_cmpxchg_pointer_sym((void **)(p), _________pold, \
-                                               _________pnew);              \
+               typeof(*(p)) _________pold = (old);                          \
+               typeof(*(p)) _________pnew = (_new);                         \
+               typeof(*(p)) _________p1 = URCU_FORCE_CAST(typeof(*(p)),     \
+                       rcu_cmpxchg_pointer_sym(URCU_FORCE_CAST(void **, p),\
+                                               _________pold,               \
+                                               _________pnew));             \
                 (_________p1);                                               \
         })
  
  extern void *rcu_xchg_pointer_sym(void **p, void *v);
  #define rcu_xchg_pointer(p, v)                                              \
         ({                                                                   \
-               typeof(*p) _________pv = (v);                                \
-               typeof(*p) _________p1 =                                     \
-                       rcu_xchg_pointer_sym((void **)(p), _________pv);     \
+               typeof(*(p)) _________pv = (v);                              \
+               typeof(*(p)) _________p1 = URCU_FORCE_CAST(typeof(*(p)),     \
+                       rcu_xchg_pointer_sym(URCU_FORCE_CAST(void **, p),    \
+                                            _________pv));                  \
                 (_________p1);                                               \
         })
  
  extern void *rcu_set_pointer_sym(void **p, void *v);
  #define rcu_set_pointer(p, v)                                               \
         ({                                                                   \
-               typeof(*p) _________pv = (v);                                \
-               typeof(*p) _________p1 =                                     \
-                       rcu_set_pointer_sym((void **)(p), _________pv);      \
+               typeof(*(p)) _________pv = (v);                              \
+               typeof(*(p)) _________p1 = URCU_FORCE_CAST(typeof(*(p)),     \
+                       rcu_set_pointer_sym(URCU_FORCE_CAST(void **, p),     \
+                                           _________pv));                   \
         })
  
  #endif /* !_LGPL_SOURCE */
diff --git a/urcu/arch/ppc.h b/urcu/arch/ppc.h

index a03d688837344f0bb2d438c7161d43dcb208a5ef..048b217392cc7cd471647625ca1dd128e6627e00 100644 (file)
--- a/urcu/arch/ppc.h
+++ b/urcu/arch/ppc.h
@@ -32,7 +32,24 @@ extern "C" {
  /* Include size of POWER5+ L3 cache lines: 256 bytes */
  #define CAA_CACHE_LINE_SIZE    256
  
-#define cmm_mb()    asm volatile("sync":::"memory")
+/*
+ * Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not
+ * preserve ordering of cacheable vs. non-cacheable accesses, so it
+ * should not be used to order with respect to MMIO operations.  An
+ * eieio+lwsync pair is also not enough for cmm_rmb, because it will
+ * order cacheable and non-cacheable memory operations separately---i.e.
+ * not the latter against the former.
+ */
+#define cmm_mb()         asm volatile("sync":::"memory")
+
+/*
+ * lwsync orders loads in cacheable memory with respect to other loads,
+ * and stores in cacheable memory with respect to other stores.
+ * Therefore, use it for barriers ordering accesses to cacheable memory
+ * only.
+ */
+#define cmm_smp_rmb()    asm volatile("lwsync":::"memory")
+#define cmm_smp_wmb()    asm volatile("lwsync":::"memory")
  
  #define mftbl()                                                \
         ({                                              \
diff --git a/urcu/compiler.h b/urcu/compiler.h

index 4bced2a9849846a25b8c1fd59a15cd2b03af0060..6db803e9319845a38d847f6ccc339c2230f55c91 100644 (file)
--- a/urcu/compiler.h
+++ b/urcu/compiler.h
@@ -79,4 +79,10 @@
   */
  #define __rcu
  
+#ifdef __cplusplus
+#define URCU_FORCE_CAST(type, arg)     (reinterpret_cast<type>(arg))
+#else
+#define URCU_FORCE_CAST(type, arg)     ((type) (arg))
+#endif
+
  #endif /* _URCU_COMPILER_H */
diff --git a/urcu/uatomic/ppc.h b/urcu/uatomic/ppc.h

index 3eb3d639748301f2b67d8e5ccce73b894a99786a..16dbd0cb7ad45d7b33501ddc7f53be69dbc8b673 100644 (file)
--- a/urcu/uatomic/ppc.h
+++ b/urcu/uatomic/ppc.h
@@ -36,10 +36,26 @@ extern "C" {
  #define ILLEGAL_INSTR  ".long  0xd00d00"
  
  /*
- * Using a isync as second barrier for exchange to provide acquire semantic.
- * According to uatomic_ops/sysdeps/gcc/powerpc.h, the documentation is "fairly
- * explicit that this also has acquire semantics."
- * Derived from AO_compare_and_swap(), but removed the comparison.
+ * Providing sequential consistency semantic with respect to other
+ * instructions for cmpxchg and add_return family of atomic primitives.
+ *
+ * This is achieved with:
+ *   lwsync (prior loads can be reordered after following load)
+ *   lwarx
+ *   stwcx.
+ *   test if success (retry)
+ *   sync
+ *
+ * Explanation of the sequential consistency provided by this scheme
+ * from Paul E. McKenney:
+ *
+ * The reason we can get away with the lwsync before is that if a prior
+ * store reorders with the lwarx, then you have to store to the atomic
+ * variable from some other CPU to detect it.
+ *
+ * And if you do that, the lwarx will lose its reservation, so the stwcx
+ * will fail.  The atomic operation will retry, so that the caller won't be
+ * able to see the misordering.
   */
  
  /* xchg */
@@ -57,7 +73,7 @@ unsigned long _uatomic_exchange(void *addr, unsigned long val, int len)
                 "1:\t"  "lwarx %0,0,%1\n"       /* load and reserve */
                         "stwcx. %2,0,%1\n"      /* else store conditional */
                         "bne- 1b\n"             /* retry if lost reservation */
-                       "isync\n"
+                       "sync\n"
                                 : "=&r"(result)
                                 : "r"(addr), "r"(val)
                                 : "memory", "cc");
@@ -74,7 +90,7 @@ unsigned long _uatomic_exchange(void *addr, unsigned long val, int len)
                 "1:\t"  "ldarx %0,0,%1\n"       /* load and reserve */
                         "stdcx. %2,0,%1\n"      /* else store conditional */
                         "bne- 1b\n"             /* retry if lost reservation */
-                       "isync\n"
+                       "sync\n"
                                 : "=&r"(result)
                                 : "r"(addr), "r"(val)
                                 : "memory", "cc");
@@ -110,7 +126,7 @@ unsigned long _uatomic_cmpxchg(void *addr, unsigned long old,
                         "bne 2f\n"              /* old, fail */
                         "stwcx. %2,0,%1\n"      /* else store conditional */
                         "bne- 1b\n"             /* retry if lost reservation */
-                       "isync\n"
+                       "sync\n"
                 "2:\n"
                                 : "=&r"(old_val)
                                 : "r"(addr), "r"((unsigned int)_new),
@@ -131,7 +147,7 @@ unsigned long _uatomic_cmpxchg(void *addr, unsigned long old,
                         "bne 2f\n"              /* old, fail */
                         "stdcx. %2,0,%1\n"      /* else store conditional */
                         "bne- 1b\n"             /* retry if lost reservation */
-                       "isync\n"
+                       "sync\n"
                 "2:\n"
                                 : "=&r"(old_val)
                                 : "r"(addr), "r"((unsigned long)_new),
@@ -171,7 +187,7 @@ unsigned long _uatomic_add_return(void *addr, unsigned long val,
                         "add %0,%2,%0\n"        /* add val to value loaded */
                         "stwcx. %0,0,%1\n"      /* store conditional */
                         "bne- 1b\n"             /* retry if lost reservation */
-                       "isync\n"
+                       "sync\n"
                                 : "=&r"(result)
                                 : "r"(addr), "r"(val)
                                 : "memory", "cc");
@@ -189,7 +205,7 @@ unsigned long _uatomic_add_return(void *addr, unsigned long val,
                         "add %0,%2,%0\n"        /* add val to value loaded */
                         "stdcx. %0,0,%1\n"      /* store conditional */
                         "bne- 1b\n"             /* retry if lost reservation */
-                       "isync\n"
+                       "sync\n"
                                 : "=&r"(result)
                                 : "r"(addr), "r"(val)
                                 : "memory", "cc");
author	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	Thu, 22 Sep 2011 14:50:04 +0000 (10:50 -0400)
committer	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	Thu, 22 Sep 2011 14:50:04 +0000 (10:50 -0400)
urcu-call-rcu-impl.h		patch \| blob \| blame \| history
urcu-pointer.h		patch \| blob \| blame \| history
urcu/arch/ppc.h		patch \| blob \| blame \| history
urcu/compiler.h		patch \| blob \| blame \| history
urcu/uatomic/ppc.h		patch \| blob \| blame \| history