uatomic/x86: Remove redundant memory barriers
[urcu.git] / include / urcu / arch / x86.h
1 // SPDX-FileCopyrightText: 2009 Paul E. McKenney, IBM Corporation.
2 // SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 //
4 // SPDX-License-Identifier: LGPL-2.1-or-later
5
6 #ifndef _URCU_ARCH_X86_H
7 #define _URCU_ARCH_X86_H
8
9 /*
10 * arch_x86.h: trivial definitions for the x86 architecture.
11 */
12
13 #include <urcu/compiler.h>
14 #include <urcu/config.h>
15 #include <urcu/syscall-compat.h>
16 #include <stdint.h>
17
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21
22 #define CAA_CACHE_LINE_SIZE 128
23
24 /*
25 * For now, using lock; addl compatibility mode even for i686, because the
26 * Pentium III is seen as a i686, but lacks mfence instruction. Only using
27 * fence for x86_64.
28 *
29 * k1om (__MIC__) is the name for the Intel MIC family (Xeon Phi). It is an
30 * x86_64 variant but lacks fence instructions.
31 */
32 #if (defined(URCU_ARCH_AMD64) && !defined(URCU_ARCH_K1OM))
33
34 /* For backwards compat */
35 #define CONFIG_RCU_HAVE_FENCE 1
36
37 #define cmm_mb() __asm__ __volatile__ ("mfence":::"memory")
38
39 /*
40 * Define cmm_rmb/cmm_wmb to "strict" barriers that may be needed when
41 * using SSE or working with I/O areas. cmm_smp_rmb/cmm_smp_wmb are
42 * only compiler barriers, which is enough for general use.
43 */
44 #define cmm_rmb() __asm__ __volatile__ ("lfence":::"memory")
45 #define cmm_wmb() __asm__ __volatile__ ("sfence"::: "memory")
46 #define cmm_smp_rmb() cmm_barrier()
47 #define cmm_smp_wmb() cmm_barrier()
48
49 #else
50
51 /*
52 * We leave smp_rmb/smp_wmb as full barriers for processors that do not have
53 * fence instructions.
54 *
55 * An empty cmm_smp_rmb() may not be enough on old PentiumPro multiprocessor
56 * systems, due to an erratum. The Linux kernel says that "Even distro
57 * kernels should think twice before enabling this", but for now let's
58 * be conservative and leave the full barrier on 32-bit processors. Also,
59 * IDT WinChip supports weak store ordering, and the kernel may enable it
60 * under our feet; cmm_smp_wmb() ceases to be a nop for these processors.
61 */
62 #if (CAA_BITS_PER_LONG == 32)
63 #define cmm_mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
64 #define cmm_rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
65 #define cmm_wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
66 #else
67 #define cmm_mb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory")
68 #define cmm_rmb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory")
69 #define cmm_wmb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory")
70 #endif
71 #endif
72
73 #define caa_cpu_relax() __asm__ __volatile__ ("rep; nop" : : : "memory")
74
75 #define HAS_CAA_GET_CYCLES
76
77 #define rdtscll(val) \
78 do { \
79 unsigned int __a, __d; \
80 __asm__ __volatile__ ("rdtsc" : "=a" (__a), "=d" (__d)); \
81 (val) = ((unsigned long long)__a) \
82 | (((unsigned long long)__d) << 32); \
83 } while(0)
84
85 typedef uint64_t caa_cycles_t;
86
87 static inline caa_cycles_t caa_get_cycles(void)
88 {
89 caa_cycles_t ret = 0;
90
91 rdtscll(ret);
92 return ret;
93 }
94
95 /*
96 * On Linux, define the membarrier system call number if not yet available in
97 * the system headers.
98 */
99 #if (defined(__linux__) && !defined(__NR_membarrier))
100 #if (CAA_BITS_PER_LONG == 32)
101 #define __NR_membarrier 375
102 #else
103 #define __NR_membarrier 324
104 #endif
105 #endif
106
107 #ifdef __cplusplus
108 }
109 #endif
110
111 #include <urcu/arch/generic.h>
112
113 #endif /* _URCU_ARCH_X86_H */
This page took 0.031014 seconds and 4 git commands to generate.