From: Mathieu Desnoyers Date: Thu, 22 Sep 2011 14:50:04 +0000 (-0400) Subject: Merge branch 'master' into urcu/ht-shrink X-Git-Tag: v0.7.0~43^2~124 X-Git-Url: https://git.liburcu.org/?a=commitdiff_plain;h=8c35d699cf442f91fbba3c99beaa41a083ef7bff;hp=9357c41599e239897db0cc18e1fbaecd1065ebc0;p=urcu.git Merge branch 'master' into urcu/ht-shrink --- diff --git a/urcu-call-rcu-impl.h b/urcu-call-rcu-impl.h index 700d128..f9250e8 100644 --- a/urcu-call-rcu-impl.h +++ b/urcu-call-rcu-impl.h @@ -386,6 +386,7 @@ int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp) static int warned = 0; call_rcu_lock(&call_rcu_mutex); + alloc_cpu_call_rcu_data(); if (cpu < 0 || maxcpus <= cpu) { if (!warned) { fprintf(stderr, "[error] liburcu: set CPU # out of range\n"); @@ -395,13 +396,21 @@ int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp) errno = EINVAL; return -EINVAL; } - alloc_cpu_call_rcu_data(); - call_rcu_unlock(&call_rcu_mutex); + if (per_cpu_call_rcu_data == NULL) { + call_rcu_unlock(&call_rcu_mutex); errno = ENOMEM; return -ENOMEM; } + + if (per_cpu_call_rcu_data[cpu] != NULL && crdp != NULL) { + call_rcu_unlock(&call_rcu_mutex); + errno = EEXIST; + return -EEXIST; + } + per_cpu_call_rcu_data[cpu] = crdp; + call_rcu_unlock(&call_rcu_mutex); return 0; } @@ -435,22 +444,17 @@ struct call_rcu_data *get_default_call_rcu_data(void) */ struct call_rcu_data *get_call_rcu_data(void) { - int curcpu; - static int warned = 0; + struct call_rcu_data *crd; if (thread_call_rcu_data != NULL) return thread_call_rcu_data; - if (maxcpus <= 0) - return get_default_call_rcu_data(); - curcpu = sched_getcpu(); - if (!warned && (curcpu < 0 || maxcpus <= curcpu)) { - fprintf(stderr, "[error] liburcu: gcrd CPU # out of range\n"); - warned = 1; + + if (maxcpus > 0) { + crd = get_cpu_call_rcu_data(sched_getcpu()); + if (crd) + return crd; } - if (curcpu >= 0 && maxcpus > curcpu && - per_cpu_call_rcu_data != NULL && - per_cpu_call_rcu_data[curcpu] != NULL) - return per_cpu_call_rcu_data[curcpu]; + return get_default_call_rcu_data(); } @@ -518,8 +522,13 @@ int create_all_cpu_call_rcu_data(unsigned long flags) } call_rcu_unlock(&call_rcu_mutex); if ((ret = set_cpu_call_rcu_data(i, crdp)) != 0) { - /* FIXME: Leaks crdp for now. */ - return ret; /* Can happen on race. */ + call_rcu_data_free(crdp); + + /* it has been created by other thread */ + if (ret == -EEXIST) + continue; + + return ret; } } return 0; @@ -605,9 +614,11 @@ void call_rcu_data_free(struct call_rcu_data *crdp) *cbs_endprev = cbs; uatomic_add(&default_call_rcu_data->qlen, uatomic_read(&crdp->qlen)); - cds_list_del(&crdp->list); - free(crdp); + wake_call_rcu_thread(default_call_rcu_data); } + + cds_list_del(&crdp->list); + free(crdp); } /* @@ -656,7 +667,7 @@ void call_rcu_after_fork_parent(void) */ void call_rcu_after_fork_child(void) { - struct call_rcu_data *crdp; + struct call_rcu_data *crdp, *next; /* Release the mutex. */ call_rcu_unlock(&call_rcu_mutex); @@ -669,12 +680,9 @@ void call_rcu_after_fork_child(void) (void)get_default_call_rcu_data(); /* Dispose of all of the rest of the call_rcu_data structures. */ - while (call_rcu_data_list.next != call_rcu_data_list.prev) { - crdp = cds_list_entry(call_rcu_data_list.prev, - struct call_rcu_data, list); + cds_list_for_each_entry_safe(crdp, next, &call_rcu_data_list, list) { if (crdp == default_call_rcu_data) - crdp = cds_list_entry(crdp->list.prev, - struct call_rcu_data, list); + continue; uatomic_set(&crdp->flags, URCU_CALL_RCU_STOPPED); call_rcu_data_free(crdp); } diff --git a/urcu-pointer.h b/urcu-pointer.h index 027a18f..67ee381 100644 --- a/urcu-pointer.h +++ b/urcu-pointer.h @@ -67,37 +67,40 @@ extern "C" { extern void *rcu_dereference_sym(void *p); #define rcu_dereference(p) \ ({ \ - typeof(p) _________p1 = \ - rcu_dereference_sym((void *)(p)); \ + typeof(p) _________p1 = URCU_FORCE_CAST(typeof(p), \ + rcu_dereference_sym(URCU_FORCE_CAST(void *, p))); \ (_________p1); \ }) extern void *rcu_cmpxchg_pointer_sym(void **p, void *old, void *_new); #define rcu_cmpxchg_pointer(p, old, _new) \ ({ \ - typeof(*p) _________pold = (old); \ - typeof(*p) _________pnew = (_new); \ - typeof(*p) _________p1 = \ - rcu_cmpxchg_pointer_sym((void **)(p), _________pold, \ - _________pnew); \ + typeof(*(p)) _________pold = (old); \ + typeof(*(p)) _________pnew = (_new); \ + typeof(*(p)) _________p1 = URCU_FORCE_CAST(typeof(*(p)), \ + rcu_cmpxchg_pointer_sym(URCU_FORCE_CAST(void **, p),\ + _________pold, \ + _________pnew)); \ (_________p1); \ }) extern void *rcu_xchg_pointer_sym(void **p, void *v); #define rcu_xchg_pointer(p, v) \ ({ \ - typeof(*p) _________pv = (v); \ - typeof(*p) _________p1 = \ - rcu_xchg_pointer_sym((void **)(p), _________pv); \ + typeof(*(p)) _________pv = (v); \ + typeof(*(p)) _________p1 = URCU_FORCE_CAST(typeof(*(p)), \ + rcu_xchg_pointer_sym(URCU_FORCE_CAST(void **, p), \ + _________pv)); \ (_________p1); \ }) extern void *rcu_set_pointer_sym(void **p, void *v); #define rcu_set_pointer(p, v) \ ({ \ - typeof(*p) _________pv = (v); \ - typeof(*p) _________p1 = \ - rcu_set_pointer_sym((void **)(p), _________pv); \ + typeof(*(p)) _________pv = (v); \ + typeof(*(p)) _________p1 = URCU_FORCE_CAST(typeof(*(p)), \ + rcu_set_pointer_sym(URCU_FORCE_CAST(void **, p), \ + _________pv)); \ }) #endif /* !_LGPL_SOURCE */ diff --git a/urcu/arch/ppc.h b/urcu/arch/ppc.h index a03d688..048b217 100644 --- a/urcu/arch/ppc.h +++ b/urcu/arch/ppc.h @@ -32,7 +32,24 @@ extern "C" { /* Include size of POWER5+ L3 cache lines: 256 bytes */ #define CAA_CACHE_LINE_SIZE 256 -#define cmm_mb() asm volatile("sync":::"memory") +/* + * Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not + * preserve ordering of cacheable vs. non-cacheable accesses, so it + * should not be used to order with respect to MMIO operations. An + * eieio+lwsync pair is also not enough for cmm_rmb, because it will + * order cacheable and non-cacheable memory operations separately---i.e. + * not the latter against the former. + */ +#define cmm_mb() asm volatile("sync":::"memory") + +/* + * lwsync orders loads in cacheable memory with respect to other loads, + * and stores in cacheable memory with respect to other stores. + * Therefore, use it for barriers ordering accesses to cacheable memory + * only. + */ +#define cmm_smp_rmb() asm volatile("lwsync":::"memory") +#define cmm_smp_wmb() asm volatile("lwsync":::"memory") #define mftbl() \ ({ \ diff --git a/urcu/compiler.h b/urcu/compiler.h index 4bced2a..6db803e 100644 --- a/urcu/compiler.h +++ b/urcu/compiler.h @@ -79,4 +79,10 @@ */ #define __rcu +#ifdef __cplusplus +#define URCU_FORCE_CAST(type, arg) (reinterpret_cast(arg)) +#else +#define URCU_FORCE_CAST(type, arg) ((type) (arg)) +#endif + #endif /* _URCU_COMPILER_H */ diff --git a/urcu/uatomic/ppc.h b/urcu/uatomic/ppc.h index 3eb3d63..16dbd0c 100644 --- a/urcu/uatomic/ppc.h +++ b/urcu/uatomic/ppc.h @@ -36,10 +36,26 @@ extern "C" { #define ILLEGAL_INSTR ".long 0xd00d00" /* - * Using a isync as second barrier for exchange to provide acquire semantic. - * According to uatomic_ops/sysdeps/gcc/powerpc.h, the documentation is "fairly - * explicit that this also has acquire semantics." - * Derived from AO_compare_and_swap(), but removed the comparison. + * Providing sequential consistency semantic with respect to other + * instructions for cmpxchg and add_return family of atomic primitives. + * + * This is achieved with: + * lwsync (prior loads can be reordered after following load) + * lwarx + * stwcx. + * test if success (retry) + * sync + * + * Explanation of the sequential consistency provided by this scheme + * from Paul E. McKenney: + * + * The reason we can get away with the lwsync before is that if a prior + * store reorders with the lwarx, then you have to store to the atomic + * variable from some other CPU to detect it. + * + * And if you do that, the lwarx will lose its reservation, so the stwcx + * will fail. The atomic operation will retry, so that the caller won't be + * able to see the misordering. */ /* xchg */ @@ -57,7 +73,7 @@ unsigned long _uatomic_exchange(void *addr, unsigned long val, int len) "1:\t" "lwarx %0,0,%1\n" /* load and reserve */ "stwcx. %2,0,%1\n" /* else store conditional */ "bne- 1b\n" /* retry if lost reservation */ - "isync\n" + "sync\n" : "=&r"(result) : "r"(addr), "r"(val) : "memory", "cc"); @@ -74,7 +90,7 @@ unsigned long _uatomic_exchange(void *addr, unsigned long val, int len) "1:\t" "ldarx %0,0,%1\n" /* load and reserve */ "stdcx. %2,0,%1\n" /* else store conditional */ "bne- 1b\n" /* retry if lost reservation */ - "isync\n" + "sync\n" : "=&r"(result) : "r"(addr), "r"(val) : "memory", "cc"); @@ -110,7 +126,7 @@ unsigned long _uatomic_cmpxchg(void *addr, unsigned long old, "bne 2f\n" /* old, fail */ "stwcx. %2,0,%1\n" /* else store conditional */ "bne- 1b\n" /* retry if lost reservation */ - "isync\n" + "sync\n" "2:\n" : "=&r"(old_val) : "r"(addr), "r"((unsigned int)_new), @@ -131,7 +147,7 @@ unsigned long _uatomic_cmpxchg(void *addr, unsigned long old, "bne 2f\n" /* old, fail */ "stdcx. %2,0,%1\n" /* else store conditional */ "bne- 1b\n" /* retry if lost reservation */ - "isync\n" + "sync\n" "2:\n" : "=&r"(old_val) : "r"(addr), "r"((unsigned long)_new), @@ -171,7 +187,7 @@ unsigned long _uatomic_add_return(void *addr, unsigned long val, "add %0,%2,%0\n" /* add val to value loaded */ "stwcx. %0,0,%1\n" /* store conditional */ "bne- 1b\n" /* retry if lost reservation */ - "isync\n" + "sync\n" : "=&r"(result) : "r"(addr), "r"(val) : "memory", "cc"); @@ -189,7 +205,7 @@ unsigned long _uatomic_add_return(void *addr, unsigned long val, "add %0,%2,%0\n" /* add val to value loaded */ "stdcx. %0,0,%1\n" /* store conditional */ "bne- 1b\n" /* retry if lost reservation */ - "isync\n" + "sync\n" : "=&r"(result) : "r"(addr), "r"(val) : "memory", "cc");