static int warned = 0;
call_rcu_lock(&call_rcu_mutex);
+ alloc_cpu_call_rcu_data();
if (cpu < 0 || maxcpus <= cpu) {
if (!warned) {
fprintf(stderr, "[error] liburcu: set CPU # out of range\n");
errno = EINVAL;
return -EINVAL;
}
- alloc_cpu_call_rcu_data();
- call_rcu_unlock(&call_rcu_mutex);
+
if (per_cpu_call_rcu_data == NULL) {
+ call_rcu_unlock(&call_rcu_mutex);
errno = ENOMEM;
return -ENOMEM;
}
+
+ if (per_cpu_call_rcu_data[cpu] != NULL && crdp != NULL) {
+ call_rcu_unlock(&call_rcu_mutex);
+ errno = EEXIST;
+ return -EEXIST;
+ }
+
per_cpu_call_rcu_data[cpu] = crdp;
+ call_rcu_unlock(&call_rcu_mutex);
return 0;
}
*/
struct call_rcu_data *get_call_rcu_data(void)
{
- int curcpu;
- static int warned = 0;
+ struct call_rcu_data *crd;
if (thread_call_rcu_data != NULL)
return thread_call_rcu_data;
- if (maxcpus <= 0)
- return get_default_call_rcu_data();
- curcpu = sched_getcpu();
- if (!warned && (curcpu < 0 || maxcpus <= curcpu)) {
- fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n");
- warned = 1;
+
+ if (maxcpus > 0) {
+ crd = get_cpu_call_rcu_data(sched_getcpu());
+ if (crd)
+ return crd;
}
- if (curcpu >= 0 && maxcpus > curcpu &&
- per_cpu_call_rcu_data != NULL &&
- per_cpu_call_rcu_data[curcpu] != NULL)
- return per_cpu_call_rcu_data[curcpu];
+
return get_default_call_rcu_data();
}
}
call_rcu_unlock(&call_rcu_mutex);
if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) {
- /* FIXME: Leaks crdp for now. */
- return ret; /* Can happen on race. */
+ call_rcu_data_free(crdp);
+
+ /* it has been created by other thread */
+ if (ret == -EEXIST)
+ continue;
+
+ return ret;
}
}
return 0;
*cbs_endprev = cbs;
uatomic_add(&default_call_rcu_data->qlen,
uatomic_read(&crdp->qlen));
- cds_list_del(&crdp->list);
- free(crdp);
+ wake_call_rcu_thread(default_call_rcu_data);
}
+
+ cds_list_del(&crdp->list);
+ free(crdp);
}
/*
*/
void call_rcu_after_fork_child(void)
{
- struct call_rcu_data *crdp;
+ struct call_rcu_data *crdp, *next;
/* Release the mutex. */
call_rcu_unlock(&call_rcu_mutex);
(void)get_default_call_rcu_data();
/* Dispose of all of the rest of the call_rcu_data structures. */
- while (call_rcu_data_list.next != call_rcu_data_list.prev) {
- crdp = cds_list_entry(call_rcu_data_list.prev,
- struct call_rcu_data, list);
+ cds_list_for_each_entry_safe(crdp, next, &call_rcu_data_list, list) {
if (crdp == default_call_rcu_data)
- crdp = cds_list_entry(crdp->list.prev,
- struct call_rcu_data, list);
+ continue;
uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED);
call_rcu_data_free(crdp);
}
extern void *rcu_dereference_sym(void *p);
#define rcu_dereference(p) \
({ \
- typeof(p) _________p1 = \
- rcu_dereference_sym((void *)(p)); \
+ typeof(p) _________p1 = URCU_FORCE_CAST(typeof(p), \
+ rcu_dereference_sym(URCU_FORCE_CAST(void *, p))); \
(_________p1); \
})
extern void *rcu_cmpxchg_pointer_sym(void **p, void *old, void *_new);
#define rcu_cmpxchg_pointer(p, old, _new) \
({ \
- typeof(*p) _________pold = (old); \
- typeof(*p) _________pnew = (_new); \
- typeof(*p) _________p1 = \
- rcu_cmpxchg_pointer_sym((void **)(p), _________pold, \
- _________pnew); \
+ typeof(*(p)) _________pold = (old); \
+ typeof(*(p)) _________pnew = (_new); \
+ typeof(*(p)) _________p1 = URCU_FORCE_CAST(typeof(*(p)), \
+ rcu_cmpxchg_pointer_sym(URCU_FORCE_CAST(void **, p),\
+ _________pold, \
+ _________pnew)); \
(_________p1); \
})
extern void *rcu_xchg_pointer_sym(void **p, void *v);
#define rcu_xchg_pointer(p, v) \
({ \
- typeof(*p) _________pv = (v); \
- typeof(*p) _________p1 = \
- rcu_xchg_pointer_sym((void **)(p), _________pv); \
+ typeof(*(p)) _________pv = (v); \
+ typeof(*(p)) _________p1 = URCU_FORCE_CAST(typeof(*(p)), \
+ rcu_xchg_pointer_sym(URCU_FORCE_CAST(void **, p), \
+ _________pv)); \
(_________p1); \
})
extern void *rcu_set_pointer_sym(void **p, void *v);
#define rcu_set_pointer(p, v) \
({ \
- typeof(*p) _________pv = (v); \
- typeof(*p) _________p1 = \
- rcu_set_pointer_sym((void **)(p), _________pv); \
+ typeof(*(p)) _________pv = (v); \
+ typeof(*(p)) _________p1 = URCU_FORCE_CAST(typeof(*(p)), \
+ rcu_set_pointer_sym(URCU_FORCE_CAST(void **, p), \
+ _________pv)); \
})
#endif /* !_LGPL_SOURCE */
/* Include size of POWER5+ L3 cache lines: 256 bytes */
#define CAA_CACHE_LINE_SIZE 256
-#define cmm_mb() asm volatile("sync":::"memory")
+/*
+ * Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not
+ * preserve ordering of cacheable vs. non-cacheable accesses, so it
+ * should not be used to order with respect to MMIO operations. An
+ * eieio+lwsync pair is also not enough for cmm_rmb, because it will
+ * order cacheable and non-cacheable memory operations separately---i.e.
+ * not the latter against the former.
+ */
+#define cmm_mb() asm volatile("sync":::"memory")
+
+/*
+ * lwsync orders loads in cacheable memory with respect to other loads,
+ * and stores in cacheable memory with respect to other stores.
+ * Therefore, use it for barriers ordering accesses to cacheable memory
+ * only.
+ */
+#define cmm_smp_rmb() asm volatile("lwsync":::"memory")
+#define cmm_smp_wmb() asm volatile("lwsync":::"memory")
#define mftbl() \
({ \
*/
#define __rcu
+#ifdef __cplusplus
+#define URCU_FORCE_CAST(type, arg) (reinterpret_cast<type>(arg))
+#else
+#define URCU_FORCE_CAST(type, arg) ((type) (arg))
+#endif
+
#endif /* _URCU_COMPILER_H */
#define ILLEGAL_INSTR ".long 0xd00d00"
/*
- * Using a isync as second barrier for exchange to provide acquire semantic.
- * According to uatomic_ops/sysdeps/gcc/powerpc.h, the documentation is "fairly
- * explicit that this also has acquire semantics."
- * Derived from AO_compare_and_swap(), but removed the comparison.
+ * Providing sequential consistency semantic with respect to other
+ * instructions for cmpxchg and add_return family of atomic primitives.
+ *
+ * This is achieved with:
+ * lwsync (prior loads can be reordered after following load)
+ * lwarx
+ * stwcx.
+ * test if success (retry)
+ * sync
+ *
+ * Explanation of the sequential consistency provided by this scheme
+ * from Paul E. McKenney:
+ *
+ * The reason we can get away with the lwsync before is that if a prior
+ * store reorders with the lwarx, then you have to store to the atomic
+ * variable from some other CPU to detect it.
+ *
+ * And if you do that, the lwarx will lose its reservation, so the stwcx
+ * will fail. The atomic operation will retry, so that the caller won't be
+ * able to see the misordering.
*/
/* xchg */
"1:\t" "lwarx %0,0,%1\n" /* load and reserve */
"stwcx. %2,0,%1\n" /* else store conditional */
"bne- 1b\n" /* retry if lost reservation */
- "isync\n"
+ "sync\n"
: "=&r"(result)
: "r"(addr), "r"(val)
: "memory", "cc");
"1:\t" "ldarx %0,0,%1\n" /* load and reserve */
"stdcx. %2,0,%1\n" /* else store conditional */
"bne- 1b\n" /* retry if lost reservation */
- "isync\n"
+ "sync\n"
: "=&r"(result)
: "r"(addr), "r"(val)
: "memory", "cc");
"bne 2f\n" /* old, fail */
"stwcx. %2,0,%1\n" /* else store conditional */
"bne- 1b\n" /* retry if lost reservation */
- "isync\n"
+ "sync\n"
"2:\n"
: "=&r"(old_val)
: "r"(addr), "r"((unsigned int)_new),
"bne 2f\n" /* old, fail */
"stdcx. %2,0,%1\n" /* else store conditional */
"bne- 1b\n" /* retry if lost reservation */
- "isync\n"
+ "sync\n"
"2:\n"
: "=&r"(old_val)
: "r"(addr), "r"((unsigned long)_new),
"add %0,%2,%0\n" /* add val to value loaded */
"stwcx. %0,0,%1\n" /* store conditional */
"bne- 1b\n" /* retry if lost reservation */
- "isync\n"
+ "sync\n"
: "=&r"(result)
: "r"(addr), "r"(val)
: "memory", "cc");
"add %0,%2,%0\n" /* add val to value loaded */
"stdcx. %0,0,%1\n" /* store conditional */
"bne- 1b\n" /* retry if lost reservation */
- "isync\n"
+ "sync\n"
: "=&r"(result)
: "r"(addr), "r"(val)
: "memory", "cc");