include/urcu/arch/ppc.h

   1 // SPDX-FileCopyrightText: 2009 Paul E. McKenney, IBM Corporation.
   2 // SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
   3 //
   4 // SPDX-License-Identifier: LGPL-2.1-or-later
   5
   6 #ifndef _URCU_ARCH_PPC_H
   7 #define _URCU_ARCH_PPC_H
   8
   9 /*
  10  * arch_ppc.h: trivial definitions for the powerpc architecture.
  11  */
  12
  13 #include <urcu/compiler.h>
  14 #include <urcu/config.h>
  15 #include <urcu/syscall-compat.h>
  16 #include <stdint.h>
  17
  18 #ifdef __cplusplus
  19 extern "C" {
  20 #endif
  21
  22 /*
  23  * Most powerpc machines have 128 bytes cache lines, but to make sure
  24  * there is no false sharing on all known Power hardware, use the
  25  * largest known cache line size, which is the physical size of POWER5
  26  * L3 cache lines (256 bytes).
  27  *
  28  * "Each slice [of the L3] is 12-way set-associative, with 4,096
  29  * congruence classes of 256-byte lines managed as two 128-byte sectors
  30  * to match the L2 line size."
  31  *
  32  * From: "POWER5 system microarchitecture",
  33  *       IBM Journal of Research & Development,
  34  *       vol. 49, no. 4/5, July/September 2005
  35  *       https://www.eecg.utoronto.ca/~moshovos/ACA08/readings/power5.pdf
  36  *
  37  * This value is a compile-time constant, which prevents us from
  38  * querying the processor for the cache line size at runtime. We
  39  * therefore need to be pessimistic and assume the largest known cache
  40  * line size.
  41  *
  42  * This value is exposed through public headers, so tuning it for
  43  * specific environments is a concern for ABI compatibility between
  44  * applications and liburcu.
  45  */
  46 #define CAA_CACHE_LINE_SIZE     256
  47
  48 #ifdef __NO_LWSYNC__
  49 #define LWSYNC_OPCODE   "sync\n"
  50 #else
  51 #define LWSYNC_OPCODE   "lwsync\n"
  52 #endif
  53
  54 /*
  55  * Use sync for all cmm_mb/rmb/wmb barriers because lwsync does not
  56  * preserve ordering of cacheable vs. non-cacheable accesses, so it
  57  * should not be used to order with respect to MMIO operations.  An
  58  * eieio+lwsync pair is also not enough for cmm_rmb, because it will
  59  * order cacheable and non-cacheable memory operations separately---i.e.
  60  * not the latter against the former.
  61  */
  62 #define cmm_mb()         __asm__ __volatile__ ("sync":::"memory")
  63
  64 /*
  65  * lwsync orders loads in cacheable memory with respect to other loads,
  66  * and stores in cacheable memory with respect to other stores.
  67  * Therefore, use it for barriers ordering accesses to cacheable memory
  68  * only.
  69  */
  70 #define cmm_smp_rmb()    __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
  71 #define cmm_smp_wmb()    __asm__ __volatile__ (LWSYNC_OPCODE:::"memory")
  72
  73 #define mftbl()                                         \
  74         __extension__                                   \
  75         ({                                              \
  76                 unsigned long rval;                     \
  77                 __asm__ __volatile__ ("mftb %0" : "=r" (rval)); \
  78                 rval;                                   \
  79         })
  80
  81 #define mftbu()                                         \
  82         __extension__                                   \
  83         ({                                              \
  84                 unsigned long rval;                     \
  85                 __asm__ __volatile__ ("mftbu %0" : "=r" (rval));        \
  86                 rval;                                   \
  87         })
  88
  89 #define mftb()                                          \
  90         __extension__                                   \
  91         ({                                              \
  92                 unsigned long long rval;                \
  93                 __asm__ __volatile__ ("mftb %0" : "=r" (rval));         \
  94                 rval;                                   \
  95         })
  96
  97 #define HAS_CAA_GET_CYCLES
  98
  99 typedef uint64_t caa_cycles_t;
 100
 101 #ifdef __powerpc64__
 102 static inline caa_cycles_t caa_get_cycles(void)
 103 {
 104         return (caa_cycles_t) mftb();
 105 }
 106 #else
 107 static inline caa_cycles_t caa_get_cycles(void)
 108 {
 109         unsigned long h, l;
 110
 111         for (;;) {
 112                 h = mftbu();
 113                 cmm_barrier();
 114                 l = mftbl();
 115                 cmm_barrier();
 116                 if (mftbu() == h)
 117                         return (((caa_cycles_t) h) << 32) + l;
 118         }
 119 }
 120 #endif
 121
 122 /*
 123  * On Linux, define the membarrier system call number if not yet available in
 124  * the system headers.
 125  */
 126 #if (defined(__linux__) && !defined(__NR_membarrier))
 127 #define __NR_membarrier         365
 128 #endif
 129
 130 #ifdef __cplusplus
 131 }
 132 #endif
 133
 134 #include <urcu/arch/generic.h>
 135
 136 #endif /* _URCU_ARCH_PPC_H */