uatomic/x86: Remove redundant memory barriers
[urcu.git] / include / urcu / arch / ppc.h
1 // SPDX-FileCopyrightText: 2009 Paul E. McKenney, IBM Corporation.
2 // SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 //
4 // SPDX-License-Identifier: LGPL-2.1-or-later
5
6 #ifndef _URCU_ARCH_PPC_H
7 #define _URCU_ARCH_PPC_H
8
9 /*
10 * arch_ppc.h: trivial definitions for the powerpc architecture.
11 */
12
13 #include <urcu/compiler.h>
14 #include <urcu/config.h>
15 #include <urcu/syscall-compat.h>
16 #include <stdint.h>
17
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21
22 /*
23 * Most powerpc machines have 128 bytes cache lines, but to make sure
24 * there is no false sharing on all known Power hardware, use the
25 * largest known cache line size, which is the physical size of POWER5
26 * L3 cache lines (256 bytes).
27 *
28 * "Each slice [of the L3] is 12-way set-associative, with 4,096
29 * congruence classes of 256-byte lines managed as two 128-byte sectors
30 * to match the L2 line size."
31 *
32 * From: "POWER5 system microarchitecture",
33 * IBM Journal of Research & Development,
34 * vol. 49, no. 4/5, July/September 2005
35 * https://www.eecg.utoronto.ca/~moshovos/ACA08/readings/power5.pdf
36 *
37 * This value is a compile-time constant, which prevents us from
38 * querying the processor for the cache line size at runtime. We
39 * therefore need to be pessimistic and assume the largest known cache
40 * line size.
41 *
42 * This value is exposed through public headers, so tuning it for
43 * specific environments is a concern for ABI compatibility between
44 * applications and liburcu.
45 */
46 #define CAA_CACHE_LINE_SIZE 256
47
48 #ifdef __NO_LWSYNC__
49 #define LWSYNC_OPCODE "sync\n"
50 #else
51 #define LWSYNC_OPCODE "lwsync\n"
52 #endif
53
54 /*
55 * Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not
56 * preserve ordering of cacheable vs. non-cacheable accesses, so it
57 * should not be used to order with respect to MMIO operations. An
58 * eieio+lwsync pair is also not enough for cmm_rmb, because it will
59 * order cacheable and non-cacheable memory operations separately---i.e.
60 * not the latter against the former.
61 */
62 #define cmm_mb() __asm__ __volatile__ ("sync":::"memory")
63
64 /*
65 * lwsync orders loads in cacheable memory with respect to other loads,
66 * and stores in cacheable memory with respect to other stores.
67 * Therefore, use it for barriers ordering accesses to cacheable memory
68 * only.
69 */
70 #define cmm_smp_rmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
71 #define cmm_smp_wmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
72
73 #define mftbl() \
74 __extension__ \
75 ({ \
76 unsigned long rval; \
77 __asm__ __volatile__ ("mftb %0" : "=r" (rval)); \
78 rval; \
79 })
80
81 #define mftbu() \
82 __extension__ \
83 ({ \
84 unsigned long rval; \
85 __asm__ __volatile__ ("mftbu %0" : "=r" (rval)); \
86 rval; \
87 })
88
89 #define mftb() \
90 __extension__ \
91 ({ \
92 unsigned long long rval; \
93 __asm__ __volatile__ ("mftb %0" : "=r" (rval)); \
94 rval; \
95 })
96
97 #define HAS_CAA_GET_CYCLES
98
99 typedef uint64_t caa_cycles_t;
100
101 #ifdef __powerpc64__
102 static inline caa_cycles_t caa_get_cycles(void)
103 {
104 return (caa_cycles_t) mftb();
105 }
106 #else
107 static inline caa_cycles_t caa_get_cycles(void)
108 {
109 unsigned long h, l;
110
111 for (;;) {
112 h = mftbu();
113 cmm_barrier();
114 l = mftbl();
115 cmm_barrier();
116 if (mftbu() == h)
117 return (((caa_cycles_t) h) << 32) + l;
118 }
119 }
120 #endif
121
122 /*
123 * On Linux, define the membarrier system call number if not yet available in
124 * the system headers.
125 */
126 #if (defined(__linux__) && !defined(__NR_membarrier))
127 #define __NR_membarrier 365
128 #endif
129
130 #ifdef __cplusplus
131 }
132 #endif
133
134 #include <urcu/arch/generic.h>
135
136 #endif /* _URCU_ARCH_PPC_H */
This page took 0.033396 seconds and 5 git commands to generate.