extern "C" {
#endif
-#ifdef __NO_LWSYNC__
-#define LWSYNC_OPCODE "sync\n"
-#else
-#define LWSYNC_OPCODE "lwsync\n"
-#endif
-
#define ILLEGAL_INSTR ".long 0xd00d00"
/*
- * Using a isync as second barrier for exchange to provide acquire semantic.
- * According to uatomic_ops/sysdeps/gcc/powerpc.h, the documentation is "fairly
- * explicit that this also has acquire semantics."
- * Derived from AO_compare_and_swap(), but removed the comparison.
+ * Providing sequential consistency semantic with respect to other
+ * instructions for cmpxchg and add_return family of atomic primitives.
+ *
+ * This is achieved with:
+ * lwsync (prior loads can be reordered after following load)
+ * lwarx
+ * stwcx.
+ * test if success (retry)
+ * sync
+ *
+ * Explanation of the sequential consistency provided by this scheme
+ * from Paul E. McKenney:
+ *
+ * The reason we can get away with the lwsync before is that if a prior
+ * store reorders with the lwarx, then you have to store to the atomic
+ * variable from some other CPU to detect it.
+ *
+ * And if you do that, the lwarx will lose its reservation, so the stwcx
+ * will fail. The atomic operation will retry, so that the caller won't be
+ * able to see the misordering.
*/
/* xchg */
"1:\t" "lwarx %0,0,%1\n" /* load and reserve */
"stwcx. %2,0,%1\n" /* else store conditional */
"bne- 1b\n" /* retry if lost reservation */
- "isync\n"
+ "sync\n"
: "=&r"(result)
: "r"(addr), "r"(val)
: "memory", "cc");
"1:\t" "ldarx %0,0,%1\n" /* load and reserve */
"stdcx. %2,0,%1\n" /* else store conditional */
"bne- 1b\n" /* retry if lost reservation */
- "isync\n"
+ "sync\n"
: "=&r"(result)
: "r"(addr), "r"(val)
: "memory", "cc");
}
#define uatomic_xchg(addr, v) \
- ((__typeof__(*(addr))) _uatomic_exchange((addr), (unsigned long)(v), \
+ ((__typeof__(*(addr))) _uatomic_exchange((addr), \
+ caa_cast_long_keep_sign(v), \
sizeof(*(addr))))
/* cmpxchg */
"bne 2f\n" /* old, fail */
"stwcx. %2,0,%1\n" /* else store conditional */
"bne- 1b\n" /* retry if lost reservation */
- "isync\n"
+ "sync\n"
"2:\n"
: "=&r"(old_val)
: "r"(addr), "r"((unsigned int)_new),
"bne 2f\n" /* old, fail */
"stdcx. %2,0,%1\n" /* else store conditional */
"bne- 1b\n" /* retry if lost reservation */
- "isync\n"
+ "sync\n"
"2:\n"
: "=&r"(old_val)
: "r"(addr), "r"((unsigned long)_new),
}
-#define uatomic_cmpxchg(addr, old, _new) \
- ((__typeof__(*(addr))) _uatomic_cmpxchg((addr), (unsigned long)(old),\
- (unsigned long)(_new), \
+#define uatomic_cmpxchg(addr, old, _new) \
+ ((__typeof__(*(addr))) _uatomic_cmpxchg((addr), \
+ caa_cast_long_keep_sign(old), \
+ caa_cast_long_keep_sign(_new),\
sizeof(*(addr))))
/* uatomic_add_return */
"add %0,%2,%0\n" /* add val to value loaded */
"stwcx. %0,0,%1\n" /* store conditional */
"bne- 1b\n" /* retry if lost reservation */
- "isync\n"
+ "sync\n"
: "=&r"(result)
: "r"(addr), "r"(val)
: "memory", "cc");
"add %0,%2,%0\n" /* add val to value loaded */
"stdcx. %0,0,%1\n" /* store conditional */
"bne- 1b\n" /* retry if lost reservation */
- "isync\n"
+ "sync\n"
: "=&r"(result)
: "r"(addr), "r"(val)
: "memory", "cc");
}
-#define uatomic_add_return(addr, v) \
- ((__typeof__(*(addr))) _uatomic_add_return((addr), \
- (unsigned long)(v), \
- sizeof(*(addr))))
+#define uatomic_add_return(addr, v) \
+ ((__typeof__(*(addr))) _uatomic_add_return((addr), \
+ caa_cast_long_keep_sign(v), \
+ sizeof(*(addr))))
#ifdef __cplusplus
}