ppc: Document cache line size choice
[urcu.git] / include / urcu / arch / ppc.h
CommitLineData
d3d3857f
MJ
1// SPDX-FileCopyrightText: 2009 Paul E. McKenney, IBM Corporation.
2// SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3//
4// SPDX-License-Identifier: LGPL-2.1-or-later
5
ec4e58a3
MD
6#ifndef _URCU_ARCH_PPC_H
7#define _URCU_ARCH_PPC_H
121a5d44 8
6d0ce021 9/*
af02d47e 10 * arch_ppc.h: trivial definitions for the powerpc architecture.
6d0ce021
PM
11 */
12
ec4e58a3 13#include <urcu/compiler.h>
c96a3726 14#include <urcu/config.h>
999991c6 15#include <urcu/syscall-compat.h>
3fa18286 16#include <stdint.h>
121a5d44 17
36bc70a8
MD
18#ifdef __cplusplus
19extern "C" {
67ecffc0 20#endif
36bc70a8 21
5307e3ab
MD
22/*
23 * Most powerpc machines have 128 bytes cache lines, but to make sure
24 * there is no false sharing on all known Power hardware, use the
25 * largest known cache line size, which is the physical size of POWER5
26 * L3 cache lines (256 bytes).
27 *
28 * "Each slice [of the L3] is 12-way set-associative, with 4,096
29 * congruence classes of 256-byte lines managed as two 128-byte sectors
30 * to match the L2 line size."
31 *
32 * From: "POWER5 system microarchitecture",
33 * IBM Journal of Research & Development,
34 * vol. 49, no. 4/5, July/September 2005
35 * https://www.eecg.utoronto.ca/~moshovos/ACA08/readings/power5.pdf
36 *
37 * This value is a compile-time constant, which prevents us from
38 * querying the processor for the cache line size at runtime. We
39 * therefore need to be pessimistic and assume the largest known cache
40 * line size.
41 *
42 * This value is exposed through public headers, so tuning it for
43 * specific environments is a concern for ABI compatibility between
44 * applications and liburcu.
45 */
06f22bdb 46#define CAA_CACHE_LINE_SIZE 256
b4e52e3e 47
e62b2f86
MD
48#ifdef __NO_LWSYNC__
49#define LWSYNC_OPCODE "sync\n"
50#else
51#define LWSYNC_OPCODE "lwsync\n"
52#endif
53
0174d10d
PB
54/*
55 * Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not
56 * preserve ordering of cacheable vs. non-cacheable accesses, so it
57 * should not be used to order with respect to MMIO operations. An
58 * eieio+lwsync pair is also not enough for cmm_rmb, because it will
59 * order cacheable and non-cacheable memory operations separately---i.e.
60 * not the latter against the former.
61 */
e51500ed 62#define cmm_mb() __asm__ __volatile__ ("sync":::"memory")
0174d10d
PB
63
64/*
65 * lwsync orders loads in cacheable memory with respect to other loads,
66 * and stores in cacheable memory with respect to other stores.
67 * Therefore, use it for barriers ordering accesses to cacheable memory
68 * only.
69 */
e51500ed
MD
70#define cmm_smp_rmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
71#define cmm_smp_wmb() __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
6d0ce021 72
af02d47e 73#define mftbl() \
1b85da85 74 __extension__ \
af02d47e
MD
75 ({ \
76 unsigned long rval; \
c6fbc279 77 __asm__ __volatile__ ("mftb %0" : "=r" (rval)); \
af02d47e
MD
78 rval; \
79 })
80
81#define mftbu() \
1b85da85 82 __extension__ \
af02d47e
MD
83 ({ \
84 unsigned long rval; \
e51500ed 85 __asm__ __volatile__ ("mftbu %0" : "=r" (rval)); \
af02d47e
MD
86 rval; \
87 })
6d0ce021 88
9a9d403a 89#define mftb() \
1b85da85 90 __extension__ \
9a9d403a
TMQMF
91 ({ \
92 unsigned long long rval; \
e51500ed 93 __asm__ __volatile__ ("mftb %0" : "=r" (rval)); \
9a9d403a
TMQMF
94 rval; \
95 })
96
f8c43f45
MD
97#define HAS_CAA_GET_CYCLES
98
3fa18286 99typedef uint64_t caa_cycles_t;
6d0ce021 100
9a9d403a 101#ifdef __powerpc64__
3fa18286 102static inline caa_cycles_t caa_get_cycles(void)
6d0ce021 103{
3fa18286 104 return (caa_cycles_t) mftb();
9a9d403a
TMQMF
105}
106#else
3fa18286 107static inline caa_cycles_t caa_get_cycles(void)
9a9d403a
TMQMF
108{
109 unsigned long h, l;
6d0ce021
PM
110
111 for (;;) {
112 h = mftbu();
5481ddb3 113 cmm_barrier();
6d0ce021 114 l = mftbl();
5481ddb3 115 cmm_barrier();
6d0ce021 116 if (mftbu() == h)
3fa18286 117 return (((caa_cycles_t) h) << 32) + l;
6d0ce021
PM
118 }
119}
9a9d403a 120#endif
121a5d44 121
1b2c84f9 122/*
84f4ccb4
MD
123 * On Linux, define the membarrier system call number if not yet available in
124 * the system headers.
1b2c84f9 125 */
84f4ccb4 126#if (defined(__linux__) && !defined(__NR_membarrier))
1b2c84f9
MD
127#define __NR_membarrier 365
128#endif
129
67ecffc0 130#ifdef __cplusplus
36bc70a8
MD
131}
132#endif
133
1b9119f8 134#include <urcu/arch/generic.h>
e4d1eb09 135
ec4e58a3 136#endif /* _URCU_ARCH_PPC_H */
This page took 0.057483 seconds and 4 git commands to generate.