liblttng-ust/perf_event.h

   1 /*
   2  * Performance events:
   3  *
   4  *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
   5  *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
   6  *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
   7  *
   8  * Data type definitions, declarations, prototypes.
   9  *
  10  *    Started by: Thomas Gleixner and Ingo Molnar
  11  *
  12  * Header copied from Linux kernel v4.7 installed headers.
  13  *
  14  * This program is free software; you can redistribute it and/or modify
  15  * it under the terms of the GNU General Public License as published by
  16  * the Free Software Foundation; either version 2 of the License.
  17  *
  18  * This program is distributed in the hope that it will be useful,
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21  * GNU General Public License for more details.
  22  *
  23  * You should have received a copy of the GNU General Public License
  24  * along with this program; if not, you can access it online at
  25  * http://www.gnu.org/licenses/gpl-2.0.html.
  26  */
  27 #ifndef _UAPI_LINUX_PERF_EVENT_H
  28 #define _UAPI_LINUX_PERF_EVENT_H
  29
  30 #include <linux/types.h>
  31 #include <linux/ioctl.h>
  32 #include <asm/byteorder.h>
  33
  34 /*
  35  * User-space ABI bits:
  36  */
  37
  38 /*
  39  * attr.type
  40  */
  41 enum perf_type_id {
  42         PERF_TYPE_HARDWARE                      = 0,
  43         PERF_TYPE_SOFTWARE                      = 1,
  44         PERF_TYPE_TRACEPOINT                    = 2,
  45         PERF_TYPE_HW_CACHE                      = 3,
  46         PERF_TYPE_RAW                           = 4,
  47         PERF_TYPE_BREAKPOINT                    = 5,
  48
  49         PERF_TYPE_MAX,                          /* non-ABI */
  50 };
  51
  52 /*
  53  * Generalized performance event event_id types, used by the
  54  * attr.event_id parameter of the sys_perf_event_open()
  55  * syscall:
  56  */
  57 enum perf_hw_id {
  58         /*
  59          * Common hardware events, generalized by the kernel:
  60          */
  61         PERF_COUNT_HW_CPU_CYCLES                = 0,
  62         PERF_COUNT_HW_INSTRUCTIONS              = 1,
  63         PERF_COUNT_HW_CACHE_REFERENCES          = 2,
  64         PERF_COUNT_HW_CACHE_MISSES              = 3,
  65         PERF_COUNT_HW_BRANCH_INSTRUCTIONS       = 4,
  66         PERF_COUNT_HW_BRANCH_MISSES             = 5,
  67         PERF_COUNT_HW_BUS_CYCLES                = 6,
  68         PERF_COUNT_HW_STALLED_CYCLES_FRONTEND   = 7,
  69         PERF_COUNT_HW_STALLED_CYCLES_BACKEND    = 8,
  70         PERF_COUNT_HW_REF_CPU_CYCLES            = 9,
  71
  72         PERF_COUNT_HW_MAX,                      /* non-ABI */
  73 };
  74
  75 /*
  76  * Generalized hardware cache events:
  77  *
  78  *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
  79  *       { read, write, prefetch } x
  80  *       { accesses, misses }
  81  */
  82 enum perf_hw_cache_id {
  83         PERF_COUNT_HW_CACHE_L1D                 = 0,
  84         PERF_COUNT_HW_CACHE_L1I                 = 1,
  85         PERF_COUNT_HW_CACHE_LL                  = 2,
  86         PERF_COUNT_HW_CACHE_DTLB                = 3,
  87         PERF_COUNT_HW_CACHE_ITLB                = 4,
  88         PERF_COUNT_HW_CACHE_BPU                 = 5,
  89         PERF_COUNT_HW_CACHE_NODE                = 6,
  90
  91         PERF_COUNT_HW_CACHE_MAX,                /* non-ABI */
  92 };
  93
  94 enum perf_hw_cache_op_id {
  95         PERF_COUNT_HW_CACHE_OP_READ             = 0,
  96         PERF_COUNT_HW_CACHE_OP_WRITE            = 1,
  97         PERF_COUNT_HW_CACHE_OP_PREFETCH         = 2,
  98
  99         PERF_COUNT_HW_CACHE_OP_MAX,             /* non-ABI */
 100 };
 101
 102 enum perf_hw_cache_op_result_id {
 103         PERF_COUNT_HW_CACHE_RESULT_ACCESS       = 0,
 104         PERF_COUNT_HW_CACHE_RESULT_MISS         = 1,
 105
 106         PERF_COUNT_HW_CACHE_RESULT_MAX,         /* non-ABI */
 107 };
 108
 109 /*
 110  * Special "software" events provided by the kernel, even if the hardware
 111  * does not support performance events. These events measure various
 112  * physical and sw events of the kernel (and allow the profiling of them as
 113  * well):
 114  */
 115 enum perf_sw_ids {
 116         PERF_COUNT_SW_CPU_CLOCK                 = 0,
 117         PERF_COUNT_SW_TASK_CLOCK                = 1,
 118         PERF_COUNT_SW_PAGE_FAULTS               = 2,
 119         PERF_COUNT_SW_CONTEXT_SWITCHES          = 3,
 120         PERF_COUNT_SW_CPU_MIGRATIONS            = 4,
 121         PERF_COUNT_SW_PAGE_FAULTS_MIN           = 5,
 122         PERF_COUNT_SW_PAGE_FAULTS_MAJ           = 6,
 123         PERF_COUNT_SW_ALIGNMENT_FAULTS          = 7,
 124         PERF_COUNT_SW_EMULATION_FAULTS          = 8,
 125         PERF_COUNT_SW_DUMMY                     = 9,
 126         PERF_COUNT_SW_BPF_OUTPUT                = 10,
 127
 128         PERF_COUNT_SW_MAX,                      /* non-ABI */
 129 };
 130
 131 /*
 132  * Bits that can be set in attr.sample_type to request information
 133  * in the overflow packets.
 134  */
 135 enum perf_event_sample_format {
 136         PERF_SAMPLE_IP                          = 1U << 0,
 137         PERF_SAMPLE_TID                         = 1U << 1,
 138         PERF_SAMPLE_TIME                        = 1U << 2,
 139         PERF_SAMPLE_ADDR                        = 1U << 3,
 140         PERF_SAMPLE_READ                        = 1U << 4,
 141         PERF_SAMPLE_CALLCHAIN                   = 1U << 5,
 142         PERF_SAMPLE_ID                          = 1U << 6,
 143         PERF_SAMPLE_CPU                         = 1U << 7,
 144         PERF_SAMPLE_PERIOD                      = 1U << 8,
 145         PERF_SAMPLE_STREAM_ID                   = 1U << 9,
 146         PERF_SAMPLE_RAW                         = 1U << 10,
 147         PERF_SAMPLE_BRANCH_STACK                = 1U << 11,
 148         PERF_SAMPLE_REGS_USER                   = 1U << 12,
 149         PERF_SAMPLE_STACK_USER                  = 1U << 13,
 150         PERF_SAMPLE_WEIGHT                      = 1U << 14,
 151         PERF_SAMPLE_DATA_SRC                    = 1U << 15,
 152         PERF_SAMPLE_IDENTIFIER                  = 1U << 16,
 153         PERF_SAMPLE_TRANSACTION                 = 1U << 17,
 154         PERF_SAMPLE_REGS_INTR                   = 1U << 18,
 155
 156         PERF_SAMPLE_MAX = 1U << 19,             /* non-ABI */
 157 };
 158
 159 /*
 160  * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
 161  *
 162  * If the user does not pass priv level information via branch_sample_type,
 163  * the kernel uses the event's priv level. Branch and event priv levels do
 164  * not have to match. Branch priv level is checked for permissions.
 165  *
 166  * The branch types can be combined, however BRANCH_ANY covers all types
 167  * of branches and therefore it supersedes all the other types.
 168  */
 169 enum perf_branch_sample_type_shift {
 170         PERF_SAMPLE_BRANCH_USER_SHIFT           = 0, /* user branches */
 171         PERF_SAMPLE_BRANCH_KERNEL_SHIFT         = 1, /* kernel branches */
 172         PERF_SAMPLE_BRANCH_HV_SHIFT             = 2, /* hypervisor branches */
 173
 174         PERF_SAMPLE_BRANCH_ANY_SHIFT            = 3, /* any branch types */
 175         PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT       = 4, /* any call branch */
 176         PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT     = 5, /* any return branch */
 177         PERF_SAMPLE_BRANCH_IND_CALL_SHIFT       = 6, /* indirect calls */
 178         PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT       = 7, /* transaction aborts */
 179         PERF_SAMPLE_BRANCH_IN_TX_SHIFT          = 8, /* in transaction */
 180         PERF_SAMPLE_BRANCH_NO_TX_SHIFT          = 9, /* not in transaction */
 181         PERF_SAMPLE_BRANCH_COND_SHIFT           = 10, /* conditional branches */
 182
 183         PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT     = 11, /* call/ret stack */
 184         PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT       = 12, /* indirect jumps */
 185         PERF_SAMPLE_BRANCH_CALL_SHIFT           = 13, /* direct call */
 186
 187         PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT       = 14, /* no flags */
 188         PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT      = 15, /* no cycles */
 189
 190         PERF_SAMPLE_BRANCH_MAX_SHIFT            /* non-ABI */
 191 };
 192
 193 enum perf_branch_sample_type {
 194         PERF_SAMPLE_BRANCH_USER         = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
 195         PERF_SAMPLE_BRANCH_KERNEL       = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
 196         PERF_SAMPLE_BRANCH_HV           = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
 197
 198         PERF_SAMPLE_BRANCH_ANY          = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
 199         PERF_SAMPLE_BRANCH_ANY_CALL     = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
 200         PERF_SAMPLE_BRANCH_ANY_RETURN   = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
 201         PERF_SAMPLE_BRANCH_IND_CALL     = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
 202         PERF_SAMPLE_BRANCH_ABORT_TX     = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
 203         PERF_SAMPLE_BRANCH_IN_TX        = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
 204         PERF_SAMPLE_BRANCH_NO_TX        = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
 205         PERF_SAMPLE_BRANCH_COND         = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
 206
 207         PERF_SAMPLE_BRANCH_CALL_STACK   = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
 208         PERF_SAMPLE_BRANCH_IND_JUMP     = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
 209         PERF_SAMPLE_BRANCH_CALL         = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
 210
 211         PERF_SAMPLE_BRANCH_NO_FLAGS     = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
 212         PERF_SAMPLE_BRANCH_NO_CYCLES    = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
 213
 214         PERF_SAMPLE_BRANCH_MAX          = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 215 };
 216
 217 #define PERF_SAMPLE_BRANCH_PLM_ALL \
 218         (PERF_SAMPLE_BRANCH_USER|\
 219          PERF_SAMPLE_BRANCH_KERNEL|\
 220          PERF_SAMPLE_BRANCH_HV)
 221
 222 /*
 223  * Values to determine ABI of the registers dump.
 224  */
 225 enum perf_sample_regs_abi {
 226         PERF_SAMPLE_REGS_ABI_NONE       = 0,
 227         PERF_SAMPLE_REGS_ABI_32         = 1,
 228         PERF_SAMPLE_REGS_ABI_64         = 2,
 229 };
 230
 231 /*
 232  * Values for the memory transaction event qualifier, mostly for
 233  * abort events. Multiple bits can be set.
 234  */
 235 enum {
 236         PERF_TXN_ELISION        = (1 << 0), /* From elision */
 237         PERF_TXN_TRANSACTION    = (1 << 1), /* From transaction */
 238         PERF_TXN_SYNC           = (1 << 2), /* Instruction is related */
 239         PERF_TXN_ASYNC          = (1 << 3), /* Instruction not related */
 240         PERF_TXN_RETRY          = (1 << 4), /* Retry possible */
 241         PERF_TXN_CONFLICT       = (1 << 5), /* Conflict abort */
 242         PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
 243         PERF_TXN_CAPACITY_READ  = (1 << 7), /* Capacity read abort */
 244
 245         PERF_TXN_MAX            = (1 << 8), /* non-ABI */
 246
 247         /* bits 32..63 are reserved for the abort code */
 248
 249         PERF_TXN_ABORT_MASK  = (0xffffffffULL << 32),
 250         PERF_TXN_ABORT_SHIFT = 32,
 251 };
 252
 253 /*
 254  * The format of the data returned by read() on a perf event fd,
 255  * as specified by attr.read_format:
 256  *
 257  * struct read_format {
 258  *      { u64           value;
 259  *        { u64         time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
 260  *        { u64         time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
 261  *        { u64         id;           } && PERF_FORMAT_ID
 262  *      } && !PERF_FORMAT_GROUP
 263  *
 264  *      { u64           nr;
 265  *        { u64         time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
 266  *        { u64         time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
 267  *        { u64         value;
 268  *          { u64       id;           } && PERF_FORMAT_ID
 269  *        }             cntr[nr];
 270  *      } && PERF_FORMAT_GROUP
 271  * };
 272  */
 273 enum perf_event_read_format {
 274         PERF_FORMAT_TOTAL_TIME_ENABLED          = 1U << 0,
 275         PERF_FORMAT_TOTAL_TIME_RUNNING          = 1U << 1,
 276         PERF_FORMAT_ID                          = 1U << 2,
 277         PERF_FORMAT_GROUP                       = 1U << 3,
 278
 279         PERF_FORMAT_MAX = 1U << 4,              /* non-ABI */
 280 };
 281
 282 #define PERF_ATTR_SIZE_VER0     64      /* sizeof first published struct */
 283 #define PERF_ATTR_SIZE_VER1     72      /* add: config2 */
 284 #define PERF_ATTR_SIZE_VER2     80      /* add: branch_sample_type */
 285 #define PERF_ATTR_SIZE_VER3     96      /* add: sample_regs_user */
 286                                         /* add: sample_stack_user */
 287 #define PERF_ATTR_SIZE_VER4     104     /* add: sample_regs_intr */
 288 #define PERF_ATTR_SIZE_VER5     112     /* add: aux_watermark */
 289
 290 /*
 291  * Hardware event_id to monitor via a performance monitoring event:
 292  */
 293 struct perf_event_attr {
 294
 295         /*
 296          * Major type: hardware/software/tracepoint/etc.
 297          */
 298         __u32                   type;
 299
 300         /*
 301          * Size of the attr structure, for fwd/bwd compat.
 302          */
 303         __u32                   size;
 304
 305         /*
 306          * Type specific configuration information.
 307          */
 308         __u64                   config;
 309
 310         union {
 311                 __u64           sample_period;
 312                 __u64           sample_freq;
 313         };
 314
 315         __u64                   sample_type;
 316         __u64                   read_format;
 317
 318         __u64                   disabled       :  1, /* off by default        */
 319                                 inherit        :  1, /* children inherit it   */
 320                                 pinned         :  1, /* must always be on PMU */
 321                                 exclusive      :  1, /* only group on PMU     */
 322                                 exclude_user   :  1, /* don't count user      */
 323                                 exclude_kernel :  1, /* ditto kernel          */
 324                                 exclude_hv     :  1, /* ditto hypervisor      */
 325                                 exclude_idle   :  1, /* don't count when idle */
 326                                 mmap           :  1, /* include mmap data     */
 327                                 comm           :  1, /* include comm data     */
 328                                 freq           :  1, /* use freq, not period  */
 329                                 inherit_stat   :  1, /* per task counts       */
 330                                 enable_on_exec :  1, /* next exec enables     */
 331                                 task           :  1, /* trace fork/exit       */
 332                                 watermark      :  1, /* wakeup_watermark      */
 333                                 /*
 334                                  * precise_ip:
 335                                  *
 336                                  *  0 - SAMPLE_IP can have arbitrary skid
 337                                  *  1 - SAMPLE_IP must have constant skid
 338                                  *  2 - SAMPLE_IP requested to have 0 skid
 339                                  *  3 - SAMPLE_IP must have 0 skid
 340                                  *
 341                                  *  See also PERF_RECORD_MISC_EXACT_IP
 342                                  */
 343                                 precise_ip     :  2, /* skid constraint       */
 344                                 mmap_data      :  1, /* non-exec mmap data    */
 345                                 sample_id_all  :  1, /* sample_type all events */
 346
 347                                 exclude_host   :  1, /* don't count in host   */
 348                                 exclude_guest  :  1, /* don't count in guest  */
 349
 350                                 exclude_callchain_kernel : 1, /* exclude kernel callchains */
 351                                 exclude_callchain_user   : 1, /* exclude user callchains */
 352                                 mmap2          :  1, /* include mmap with inode data     */
 353                                 comm_exec      :  1, /* flag comm events that are due to an exec */
 354                                 use_clockid    :  1, /* use @clockid for time fields */
 355                                 context_switch :  1, /* context switch data */
 356                                 write_backward :  1, /* Write ring buffer from end to beginning */
 357                                 __reserved_1   : 36;
 358
 359         union {
 360                 __u32           wakeup_events;    /* wakeup every n events */
 361                 __u32           wakeup_watermark; /* bytes before wakeup   */
 362         };
 363
 364         __u32                   bp_type;
 365         union {
 366                 __u64           bp_addr;
 367                 __u64           config1; /* extension of config */
 368         };
 369         union {
 370                 __u64           bp_len;
 371                 __u64           config2; /* extension of config1 */
 372         };
 373         __u64   branch_sample_type; /* enum perf_branch_sample_type */
 374
 375         /*
 376          * Defines set of user regs to dump on samples.
 377          * See asm/perf_regs.h for details.
 378          */
 379         __u64   sample_regs_user;
 380
 381         /*
 382          * Defines size of the user stack to dump on samples.
 383          */
 384         __u32   sample_stack_user;
 385
 386         __s32   clockid;
 387         /*
 388          * Defines set of regs to dump for each sample
 389          * state captured on:
 390          *  - precise = 0: PMU interrupt
 391          *  - precise > 0: sampled instruction
 392          *
 393          * See asm/perf_regs.h for details.
 394          */
 395         __u64   sample_regs_intr;
 396
 397         /*
 398          * Wakeup watermark for AUX area
 399          */
 400         __u32   aux_watermark;
 401         __u32   __reserved_2;   /* align to __u64 */
 402 };
 403
 404 #define perf_flags(attr)        (*(&(attr)->read_format + 1))
 405
 406 /*
 407  * Ioctls that can be done on a perf event fd:
 408  */
 409 #define PERF_EVENT_IOC_ENABLE           _IO ('$', 0)
 410 #define PERF_EVENT_IOC_DISABLE          _IO ('$', 1)
 411 #define PERF_EVENT_IOC_REFRESH          _IO ('$', 2)
 412 #define PERF_EVENT_IOC_RESET            _IO ('$', 3)
 413 #define PERF_EVENT_IOC_PERIOD           _IOW('$', 4, __u64)
 414 #define PERF_EVENT_IOC_SET_OUTPUT       _IO ('$', 5)
 415 #define PERF_EVENT_IOC_SET_FILTER       _IOW('$', 6, char *)
 416 #define PERF_EVENT_IOC_ID               _IOR('$', 7, __u64 *)
 417 #define PERF_EVENT_IOC_SET_BPF          _IOW('$', 8, __u32)
 418 #define PERF_EVENT_IOC_PAUSE_OUTPUT     _IOW('$', 9, __u32)
 419
 420 enum perf_event_ioc_flags {
 421         PERF_IOC_FLAG_GROUP             = 1U << 0,
 422 };
 423
 424 /*
 425  * Structure of the page that can be mapped via mmap
 426  */
 427 struct perf_event_mmap_page {
 428         __u32   version;                /* version number of this structure */
 429         __u32   compat_version;         /* lowest version this is compat with */
 430
 431         /*
 432          * Bits needed to read the hw events in user-space.
 433          *
 434          *   u32 seq, time_mult, time_shift, index, width;
 435          *   u64 count, enabled, running;
 436          *   u64 cyc, time_offset;
 437          *   s64 pmc = 0;
 438          *
 439          *   do {
 440          *     seq = pc->lock;
 441          *     barrier()
 442          *
 443          *     enabled = pc->time_enabled;
 444          *     running = pc->time_running;
 445          *
 446          *     if (pc->cap_usr_time && enabled != running) {
 447          *       cyc = rdtsc();
 448          *       time_offset = pc->time_offset;
 449          *       time_mult   = pc->time_mult;
 450          *       time_shift  = pc->time_shift;
 451          *     }
 452          *
 453          *     index = pc->index;
 454          *     count = pc->offset;
 455          *     if (pc->cap_user_rdpmc && index) {
 456          *       width = pc->pmc_width;
 457          *       pmc = rdpmc(index - 1);
 458          *     }
 459          *
 460          *     barrier();
 461          *   } while (pc->lock != seq);
 462          *
 463          * NOTE: for obvious reason this only works on self-monitoring
 464          *       processes.
 465          */
 466         __u32   lock;                   /* seqlock for synchronization */
 467         __u32   index;                  /* hardware event identifier */
 468         __s64   offset;                 /* add to hardware event value */
 469         __u64   time_enabled;           /* time event active */
 470         __u64   time_running;           /* time event on cpu */
 471         union {
 472                 __u64   capabilities;
 473                 struct {
 474                         __u64   cap_bit0                : 1, /* Always 0, deprecated, see commit 860f085b74e9 */
 475                                 cap_bit0_is_deprecated  : 1, /* Always 1, signals that bit 0 is zero */
 476
 477                                 cap_user_rdpmc          : 1, /* The RDPMC instruction can be used to read counts */
 478                                 cap_user_time           : 1, /* The time_* fields are used */
 479                                 cap_user_time_zero      : 1, /* The time_zero field is used */
 480                                 cap_____res             : 59;
 481                 };
 482         };
 483
 484         /*
 485          * If cap_user_rdpmc this field provides the bit-width of the value
 486          * read using the rdpmc() or equivalent instruction. This can be used
 487          * to sign extend the result like:
 488          *
 489          *   pmc <<= 64 - width;
 490          *   pmc >>= 64 - width; // signed shift right
 491          *   count += pmc;
 492          */
 493         __u16   pmc_width;
 494
 495         /*
 496          * If cap_usr_time the below fields can be used to compute the time
 497          * delta since time_enabled (in ns) using rdtsc or similar.
 498          *
 499          *   u64 quot, rem;
 500          *   u64 delta;
 501          *
 502          *   quot = (cyc >> time_shift);
 503          *   rem = cyc & (((u64)1 << time_shift) - 1);
 504          *   delta = time_offset + quot * time_mult +
 505          *              ((rem * time_mult) >> time_shift);
 506          *
 507          * Where time_offset,time_mult,time_shift and cyc are read in the
 508          * seqcount loop described above. This delta can then be added to
 509          * enabled and possible running (if index), improving the scaling:
 510          *
 511          *   enabled += delta;
 512          *   if (index)
 513          *     running += delta;
 514          *
 515          *   quot = count / running;
 516          *   rem  = count % running;
 517          *   count = quot * enabled + (rem * enabled) / running;
 518          */
 519         __u16   time_shift;
 520         __u32   time_mult;
 521         __u64   time_offset;
 522         /*
 523          * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
 524          * from sample timestamps.
 525          *
 526          *   time = timestamp - time_zero;
 527          *   quot = time / time_mult;
 528          *   rem  = time % time_mult;
 529          *   cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
 530          *
 531          * And vice versa:
 532          *
 533          *   quot = cyc >> time_shift;
 534          *   rem  = cyc & (((u64)1 << time_shift) - 1);
 535          *   timestamp = time_zero + quot * time_mult +
 536          *               ((rem * time_mult) >> time_shift);
 537          */
 538         __u64   time_zero;
 539         __u32   size;                   /* Header size up to __reserved[] fields. */
 540
 541                 /*
 542                  * Hole for extension of the self monitor capabilities
 543                  */
 544
 545         __u8    __reserved[118*8+4];    /* align to 1k. */
 546
 547         /*
 548          * Control data for the mmap() data buffer.
 549          *
 550          * User-space reading the @data_head value should issue an smp_rmb(),
 551          * after reading this value.
 552          *
 553          * When the mapping is PROT_WRITE the @data_tail value should be
 554          * written by userspace to reflect the last read data, after issueing
 555          * an smp_mb() to separate the data read from the ->data_tail store.
 556          * In this case the kernel will not over-write unread data.
 557          *
 558          * See perf_output_put_handle() for the data ordering.
 559          *
 560          * data_{offset,size} indicate the location and size of the perf record
 561          * buffer within the mmapped area.
 562          */
 563         __u64   data_head;              /* head in the data section */
 564         __u64   data_tail;              /* user-space written tail */
 565         __u64   data_offset;            /* where the buffer starts */
 566         __u64   data_size;              /* data buffer size */
 567
 568         /*
 569          * AUX area is defined by aux_{offset,size} fields that should be set
 570          * by the userspace, so that
 571          *
 572          *   aux_offset >= data_offset + data_size
 573          *
 574          * prior to mmap()ing it. Size of the mmap()ed area should be aux_size.
 575          *
 576          * Ring buffer pointers aux_{head,tail} have the same semantics as
 577          * data_{head,tail} and same ordering rules apply.
 578          */
 579         __u64   aux_head;
 580         __u64   aux_tail;
 581         __u64   aux_offset;
 582         __u64   aux_size;
 583 };
 584
 585 #define PERF_RECORD_MISC_CPUMODE_MASK           (7 << 0)
 586 #define PERF_RECORD_MISC_CPUMODE_UNKNOWN        (0 << 0)
 587 #define PERF_RECORD_MISC_KERNEL                 (1 << 0)
 588 #define PERF_RECORD_MISC_USER                   (2 << 0)
 589 #define PERF_RECORD_MISC_HYPERVISOR             (3 << 0)
 590 #define PERF_RECORD_MISC_GUEST_KERNEL           (4 << 0)
 591 #define PERF_RECORD_MISC_GUEST_USER             (5 << 0)
 592
 593 /*
 594  * Indicates that /proc/PID/maps parsing are truncated by time out.
 595  */
 596 #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12)
 597 /*
 598  * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
 599  * different events so can reuse the same bit position.
 600  * Ditto PERF_RECORD_MISC_SWITCH_OUT.
 601  */
 602 #define PERF_RECORD_MISC_MMAP_DATA              (1 << 13)
 603 #define PERF_RECORD_MISC_COMM_EXEC              (1 << 13)
 604 #define PERF_RECORD_MISC_SWITCH_OUT             (1 << 13)
 605 /*
 606  * Indicates that the content of PERF_SAMPLE_IP points to
 607  * the actual instruction that triggered the event. See also
 608  * perf_event_attr::precise_ip.
 609  */
 610 #define PERF_RECORD_MISC_EXACT_IP               (1 << 14)
 611 /*
 612  * Reserve the last bit to indicate some extended misc field
 613  */
 614 #define PERF_RECORD_MISC_EXT_RESERVED           (1 << 15)
 615
 616 struct perf_event_header {
 617         __u32   type;
 618         __u16   misc;
 619         __u16   size;
 620 };
 621
 622 enum perf_event_type {
 623
 624         /*
 625          * If perf_event_attr.sample_id_all is set then all event types will
 626          * have the sample_type selected fields related to where/when
 627          * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU,
 628          * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed
 629          * just after the perf_event_header and the fields already present for
 630          * the existing fields, i.e. at the end of the payload. That way a newer
 631          * perf.data file will be supported by older perf tools, with these new
 632          * optional fields being ignored.
 633          *
 634          * struct sample_id {
 635          *      { u32                   pid, tid; } && PERF_SAMPLE_TID
 636          *      { u64                   time;     } && PERF_SAMPLE_TIME
 637          *      { u64                   id;       } && PERF_SAMPLE_ID
 638          *      { u64                   stream_id;} && PERF_SAMPLE_STREAM_ID
 639          *      { u32                   cpu, res; } && PERF_SAMPLE_CPU
 640          *      { u64                   id;       } && PERF_SAMPLE_IDENTIFIER
 641          * } && perf_event_attr::sample_id_all
 642          *
 643          * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.  The
 644          * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed
 645          * relative to header.size.
 646          */
 647
 648         /*
 649          * The MMAP events record the PROT_EXEC mappings so that we can
 650          * correlate userspace IPs to code. They have the following structure:
 651          *
 652          * struct {
 653          *      struct perf_event_header        header;
 654          *
 655          *      u32                             pid, tid;
 656          *      u64                             addr;
 657          *      u64                             len;
 658          *      u64                             pgoff;
 659          *      char                            filename[];
 660          *      struct sample_id                sample_id;
 661          * };
 662          */
 663         PERF_RECORD_MMAP                        = 1,
 664
 665         /*
 666          * struct {
 667          *      struct perf_event_header        header;
 668          *      u64                             id;
 669          *      u64                             lost;
 670          *      struct sample_id                sample_id;
 671          * };
 672          */
 673         PERF_RECORD_LOST                        = 2,
 674
 675         /*
 676          * struct {
 677          *      struct perf_event_header        header;
 678          *
 679          *      u32                             pid, tid;
 680          *      char                            comm[];
 681          *      struct sample_id                sample_id;
 682          * };
 683          */
 684         PERF_RECORD_COMM                        = 3,
 685
 686         /*
 687          * struct {
 688          *      struct perf_event_header        header;
 689          *      u32                             pid, ppid;
 690          *      u32                             tid, ptid;
 691          *      u64                             time;
 692          *      struct sample_id                sample_id;
 693          * };
 694          */
 695         PERF_RECORD_EXIT                        = 4,
 696
 697         /*
 698          * struct {
 699          *      struct perf_event_header        header;
 700          *      u64                             time;
 701          *      u64                             id;
 702          *      u64                             stream_id;
 703          *      struct sample_id                sample_id;
 704          * };
 705          */
 706         PERF_RECORD_THROTTLE                    = 5,
 707         PERF_RECORD_UNTHROTTLE                  = 6,
 708
 709         /*
 710          * struct {
 711          *      struct perf_event_header        header;
 712          *      u32                             pid, ppid;
 713          *      u32                             tid, ptid;
 714          *      u64                             time;
 715          *      struct sample_id                sample_id;
 716          * };
 717          */
 718         PERF_RECORD_FORK                        = 7,
 719
 720         /*
 721          * struct {
 722          *      struct perf_event_header        header;
 723          *      u32                             pid, tid;
 724          *
 725          *      struct read_format              values;
 726          *      struct sample_id                sample_id;
 727          * };
 728          */
 729         PERF_RECORD_READ                        = 8,
 730
 731         /*
 732          * struct {
 733          *      struct perf_event_header        header;
 734          *
 735          *      #
 736          *      # Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.
 737          *      # The advantage of PERF_SAMPLE_IDENTIFIER is that its position
 738          *      # is fixed relative to header.
 739          *      #
 740          *
 741          *      { u64                   id;       } && PERF_SAMPLE_IDENTIFIER
 742          *      { u64                   ip;       } && PERF_SAMPLE_IP
 743          *      { u32                   pid, tid; } && PERF_SAMPLE_TID
 744          *      { u64                   time;     } && PERF_SAMPLE_TIME
 745          *      { u64                   addr;     } && PERF_SAMPLE_ADDR
 746          *      { u64                   id;       } && PERF_SAMPLE_ID
 747          *      { u64                   stream_id;} && PERF_SAMPLE_STREAM_ID
 748          *      { u32                   cpu, res; } && PERF_SAMPLE_CPU
 749          *      { u64                   period;   } && PERF_SAMPLE_PERIOD
 750          *
 751          *      { struct read_format    values;   } && PERF_SAMPLE_READ
 752          *
 753          *      { u64                   nr,
 754          *        u64                   ips[nr];  } && PERF_SAMPLE_CALLCHAIN
 755          *
 756          *      #
 757          *      # The RAW record below is opaque data wrt the ABI
 758          *      #
 759          *      # That is, the ABI doesn't make any promises wrt to
 760          *      # the stability of its content, it may vary depending
 761          *      # on event, hardware, kernel version and phase of
 762          *      # the moon.
 763          *      #
 764          *      # In other words, PERF_SAMPLE_RAW contents are not an ABI.
 765          *      #
 766          *
 767          *      { u32                   size;
 768          *        char                  data[size];}&& PERF_SAMPLE_RAW
 769          *
 770          *      { u64                   nr;
 771          *        { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
 772          *
 773          *      { u64                   abi; # enum perf_sample_regs_abi
 774          *        u64                   regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
 775          *
 776          *      { u64                   size;
 777          *        char                  data[size];
 778          *        u64                   dyn_size; } && PERF_SAMPLE_STACK_USER
 779          *
 780          *      { u64                   weight;   } && PERF_SAMPLE_WEIGHT
 781          *      { u64                   data_src; } && PERF_SAMPLE_DATA_SRC
 782          *      { u64                   transaction; } && PERF_SAMPLE_TRANSACTION
 783          *      { u64                   abi; # enum perf_sample_regs_abi
 784          *        u64                   regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
 785          * };
 786          */
 787         PERF_RECORD_SAMPLE                      = 9,
 788
 789         /*
 790          * The MMAP2 records are an augmented version of MMAP, they add
 791          * maj, min, ino numbers to be used to uniquely identify each mapping
 792          *
 793          * struct {
 794          *      struct perf_event_header        header;
 795          *
 796          *      u32                             pid, tid;
 797          *      u64                             addr;
 798          *      u64                             len;
 799          *      u64                             pgoff;
 800          *      u32                             maj;
 801          *      u32                             min;
 802          *      u64                             ino;
 803          *      u64                             ino_generation;
 804          *      u32                             prot, flags;
 805          *      char                            filename[];
 806          *      struct sample_id                sample_id;
 807          * };
 808          */
 809         PERF_RECORD_MMAP2                       = 10,
 810
 811         /*
 812          * Records that new data landed in the AUX buffer part.
 813          *
 814          * struct {
 815          *      struct perf_event_header        header;
 816          *
 817          *      u64                             aux_offset;
 818          *      u64                             aux_size;
 819          *      u64                             flags;
 820          *      struct sample_id                sample_id;
 821          * };
 822          */
 823         PERF_RECORD_AUX                         = 11,
 824
 825         /*
 826          * Indicates that instruction trace has started
 827          *
 828          * struct {
 829          *      struct perf_event_header        header;
 830          *      u32                             pid;
 831          *      u32                             tid;
 832          * };
 833          */
 834         PERF_RECORD_ITRACE_START                = 12,
 835
 836         /*
 837          * Records the dropped/lost sample number.
 838          *
 839          * struct {
 840          *      struct perf_event_header        header;
 841          *
 842          *      u64                             lost;
 843          *      struct sample_id                sample_id;
 844          * };
 845          */
 846         PERF_RECORD_LOST_SAMPLES                = 13,
 847
 848         /*
 849          * Records a context switch in or out (flagged by
 850          * PERF_RECORD_MISC_SWITCH_OUT). See also
 851          * PERF_RECORD_SWITCH_CPU_WIDE.
 852          *
 853          * struct {
 854          *      struct perf_event_header        header;
 855          *      struct sample_id                sample_id;
 856          * };
 857          */
 858         PERF_RECORD_SWITCH                      = 14,
 859
 860         /*
 861          * CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and
 862          * next_prev_tid that are the next (switching out) or previous
 863          * (switching in) pid/tid.
 864          *
 865          * struct {
 866          *      struct perf_event_header        header;
 867          *      u32                             next_prev_pid;
 868          *      u32                             next_prev_tid;
 869          *      struct sample_id                sample_id;
 870          * };
 871          */
 872         PERF_RECORD_SWITCH_CPU_WIDE             = 15,
 873
 874         PERF_RECORD_MAX,                        /* non-ABI */
 875 };
 876
 877 #define PERF_MAX_STACK_DEPTH            127
 878 #define PERF_MAX_CONTEXTS_PER_STACK       8
 879
 880 enum perf_callchain_context {
 881         PERF_CONTEXT_HV                 = (__u64)-32,
 882         PERF_CONTEXT_KERNEL             = (__u64)-128,
 883         PERF_CONTEXT_USER               = (__u64)-512,
 884
 885         PERF_CONTEXT_GUEST              = (__u64)-2048,
 886         PERF_CONTEXT_GUEST_KERNEL       = (__u64)-2176,
 887         PERF_CONTEXT_GUEST_USER         = (__u64)-2560,
 888
 889         PERF_CONTEXT_MAX                = (__u64)-4095,
 890 };
 891
 892 /**
 893  * PERF_RECORD_AUX::flags bits
 894  */
 895 #define PERF_AUX_FLAG_TRUNCATED         0x01    /* record was truncated to fit */
 896 #define PERF_AUX_FLAG_OVERWRITE         0x02    /* snapshot from overwrite mode */
 897
 898 #define PERF_FLAG_FD_NO_GROUP           (1UL << 0)
 899 #define PERF_FLAG_FD_OUTPUT             (1UL << 1)
 900 #define PERF_FLAG_PID_CGROUP            (1UL << 2) /* pid=cgroup id, per-cpu mode only */
 901 #define PERF_FLAG_FD_CLOEXEC            (1UL << 3) /* O_CLOEXEC */
 902
 903 union perf_mem_data_src {
 904         __u64 val;
 905         struct {
 906                 __u64   mem_op:5,       /* type of opcode */
 907                         mem_lvl:14,     /* memory hierarchy level */
 908                         mem_snoop:5,    /* snoop mode */
 909                         mem_lock:2,     /* lock instr */
 910                         mem_dtlb:7,     /* tlb access */
 911                         mem_rsvd:31;
 912         };
 913 };
 914
 915 /* type of opcode (load/store/prefetch,code) */
 916 #define PERF_MEM_OP_NA          0x01 /* not available */
 917 #define PERF_MEM_OP_LOAD        0x02 /* load instruction */
 918 #define PERF_MEM_OP_STORE       0x04 /* store instruction */
 919 #define PERF_MEM_OP_PFETCH      0x08 /* prefetch */
 920 #define PERF_MEM_OP_EXEC        0x10 /* code (execution) */
 921 #define PERF_MEM_OP_SHIFT       0
 922
 923 /* memory hierarchy (memory level, hit or miss) */
 924 #define PERF_MEM_LVL_NA         0x01  /* not available */
 925 #define PERF_MEM_LVL_HIT        0x02  /* hit level */
 926 #define PERF_MEM_LVL_MISS       0x04  /* miss level  */
 927 #define PERF_MEM_LVL_L1         0x08  /* L1 */
 928 #define PERF_MEM_LVL_LFB        0x10  /* Line Fill Buffer */
 929 #define PERF_MEM_LVL_L2         0x20  /* L2 */
 930 #define PERF_MEM_LVL_L3         0x40  /* L3 */
 931 #define PERF_MEM_LVL_LOC_RAM    0x80  /* Local DRAM */
 932 #define PERF_MEM_LVL_REM_RAM1   0x100 /* Remote DRAM (1 hop) */
 933 #define PERF_MEM_LVL_REM_RAM2   0x200 /* Remote DRAM (2 hops) */
 934 #define PERF_MEM_LVL_REM_CCE1   0x400 /* Remote Cache (1 hop) */
 935 #define PERF_MEM_LVL_REM_CCE2   0x800 /* Remote Cache (2 hops) */
 936 #define PERF_MEM_LVL_IO         0x1000 /* I/O memory */
 937 #define PERF_MEM_LVL_UNC        0x2000 /* Uncached memory */
 938 #define PERF_MEM_LVL_SHIFT      5
 939
 940 /* snoop mode */
 941 #define PERF_MEM_SNOOP_NA       0x01 /* not available */
 942 #define PERF_MEM_SNOOP_NONE     0x02 /* no snoop */
 943 #define PERF_MEM_SNOOP_HIT      0x04 /* snoop hit */
 944 #define PERF_MEM_SNOOP_MISS     0x08 /* snoop miss */
 945 #define PERF_MEM_SNOOP_HITM     0x10 /* snoop hit modified */
 946 #define PERF_MEM_SNOOP_SHIFT    19
 947
 948 /* locked instruction */
 949 #define PERF_MEM_LOCK_NA        0x01 /* not available */
 950 #define PERF_MEM_LOCK_LOCKED    0x02 /* locked transaction */
 951 #define PERF_MEM_LOCK_SHIFT     24
 952
 953 /* TLB access */
 954 #define PERF_MEM_TLB_NA         0x01 /* not available */
 955 #define PERF_MEM_TLB_HIT        0x02 /* hit level */
 956 #define PERF_MEM_TLB_MISS       0x04 /* miss level */
 957 #define PERF_MEM_TLB_L1         0x08 /* L1 */
 958 #define PERF_MEM_TLB_L2         0x10 /* L2 */
 959 #define PERF_MEM_TLB_WK         0x20 /* Hardware Walker*/
 960 #define PERF_MEM_TLB_OS         0x40 /* OS fault handler */
 961 #define PERF_MEM_TLB_SHIFT      26
 962
 963 #define PERF_MEM_S(a, s) \
 964         (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
 965
 966 /*
 967  * single taken branch record layout:
 968  *
 969  *      from: source instruction (may not always be a branch insn)
 970  *        to: branch target
 971  *   mispred: branch target was mispredicted
 972  * predicted: branch target was predicted
 973  *
 974  * support for mispred, predicted is optional. In case it
 975  * is not supported mispred = predicted = 0.
 976  *
 977  *     in_tx: running in a hardware transaction
 978  *     abort: aborting a hardware transaction
 979  *    cycles: cycles from last branch (or 0 if not supported)
 980  */
 981 struct perf_branch_entry {
 982         __u64   from;
 983         __u64   to;
 984         __u64   mispred:1,  /* target mispredicted */
 985                 predicted:1,/* target predicted */
 986                 in_tx:1,    /* in transaction */
 987                 abort:1,    /* transaction abort */
 988                 cycles:16,  /* cycle count to last branch */
 989                 reserved:44;
 990 };
 991
 992 #endif /* _UAPI_LINUX_PERF_EVENT_H */