perf_event.h (42380B)
1/* 2 * Performance events x86 architecture header 3 * 4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar 6 * Copyright (C) 2009 Jaswinder Singh Rajput 7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter 8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra 9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> 10 * Copyright (C) 2009 Google, Inc., Stephane Eranian 11 * 12 * For licencing details see kernel-base/COPYING 13 */ 14 15#include <linux/perf_event.h> 16 17#include <asm/fpu/xstate.h> 18#include <asm/intel_ds.h> 19#include <asm/cpu.h> 20 21/* To enable MSR tracing please use the generic trace points. */ 22 23/* 24 * | NHM/WSM | SNB | 25 * register ------------------------------- 26 * | HT | no HT | HT | no HT | 27 *----------------------------------------- 28 * offcore | core | core | cpu | core | 29 * lbr_sel | core | core | cpu | core | 30 * ld_lat | cpu | core | cpu | core | 31 *----------------------------------------- 32 * 33 * Given that there is a small number of shared regs, 34 * we can pre-allocate their slot in the per-cpu 35 * per-core reg tables. 36 */ 37enum extra_reg_type { 38 EXTRA_REG_NONE = -1, /* not used */ 39 40 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ 41 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ 42 EXTRA_REG_LBR = 2, /* lbr_select */ 43 EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ 44 EXTRA_REG_FE = 4, /* fe_* */ 45 46 EXTRA_REG_MAX /* number of entries needed */ 47}; 48 49struct event_constraint { 50 union { 51 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 52 u64 idxmsk64; 53 }; 54 u64 code; 55 u64 cmask; 56 int weight; 57 int overlap; 58 int flags; 59 unsigned int size; 60}; 61 62static inline bool constraint_match(struct event_constraint *c, u64 ecode) 63{ 64 return ((ecode & c->cmask) - c->code) <= (u64)c->size; 65} 66 67/* 68 * struct hw_perf_event.flags flags 69 */ 70#define PERF_X86_EVENT_PEBS_LDLAT 0x00001 /* ld+ldlat data address sampling */ 71#define PERF_X86_EVENT_PEBS_ST 0x00002 /* st data address sampling */ 72#define PERF_X86_EVENT_PEBS_ST_HSW 0x00004 /* haswell style datala, store */ 73#define PERF_X86_EVENT_PEBS_LD_HSW 0x00008 /* haswell style datala, load */ 74#define PERF_X86_EVENT_PEBS_NA_HSW 0x00010 /* haswell style datala, unknown */ 75#define PERF_X86_EVENT_EXCL 0x00020 /* HT exclusivity on counter */ 76#define PERF_X86_EVENT_DYNAMIC 0x00040 /* dynamic alloc'd constraint */ 77 78#define PERF_X86_EVENT_EXCL_ACCT 0x00100 /* accounted EXCL event */ 79#define PERF_X86_EVENT_AUTO_RELOAD 0x00200 /* use PEBS auto-reload */ 80#define PERF_X86_EVENT_LARGE_PEBS 0x00400 /* use large PEBS */ 81#define PERF_X86_EVENT_PEBS_VIA_PT 0x00800 /* use PT buffer for PEBS */ 82#define PERF_X86_EVENT_PAIR 0x01000 /* Large Increment per Cycle */ 83#define PERF_X86_EVENT_LBR_SELECT 0x02000 /* Save/Restore MSR_LBR_SELECT */ 84#define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics events */ 85#define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address sampling */ 86#define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */ 87 88static inline bool is_topdown_count(struct perf_event *event) 89{ 90 return event->hw.flags & PERF_X86_EVENT_TOPDOWN; 91} 92 93static inline bool is_metric_event(struct perf_event *event) 94{ 95 u64 config = event->attr.config; 96 97 return ((config & ARCH_PERFMON_EVENTSEL_EVENT) == 0) && 98 ((config & INTEL_ARCH_EVENT_MASK) >= INTEL_TD_METRIC_RETIRING) && 99 ((config & INTEL_ARCH_EVENT_MASK) <= INTEL_TD_METRIC_MAX); 100} 101 102static inline bool is_slots_event(struct perf_event *event) 103{ 104 return (event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_TD_SLOTS; 105} 106 107static inline bool is_topdown_event(struct perf_event *event) 108{ 109 return is_metric_event(event) || is_slots_event(event); 110} 111 112struct amd_nb { 113 int nb_id; /* NorthBridge id */ 114 int refcnt; /* reference count */ 115 struct perf_event *owners[X86_PMC_IDX_MAX]; 116 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 117}; 118 119#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) 120#define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60) 121#define PEBS_OUTPUT_OFFSET 61 122#define PEBS_OUTPUT_MASK (3ull << PEBS_OUTPUT_OFFSET) 123#define PEBS_OUTPUT_PT (1ull << PEBS_OUTPUT_OFFSET) 124#define PEBS_VIA_PT_MASK (PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD) 125 126/* 127 * Flags PEBS can handle without an PMI. 128 * 129 * TID can only be handled by flushing at context switch. 130 * REGS_USER can be handled for events limited to ring 3. 131 * 132 */ 133#define LARGE_PEBS_FLAGS \ 134 (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ 135 PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ 136 PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ 137 PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ 138 PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \ 139 PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE) 140 141#define PEBS_GP_REGS \ 142 ((1ULL << PERF_REG_X86_AX) | \ 143 (1ULL << PERF_REG_X86_BX) | \ 144 (1ULL << PERF_REG_X86_CX) | \ 145 (1ULL << PERF_REG_X86_DX) | \ 146 (1ULL << PERF_REG_X86_DI) | \ 147 (1ULL << PERF_REG_X86_SI) | \ 148 (1ULL << PERF_REG_X86_SP) | \ 149 (1ULL << PERF_REG_X86_BP) | \ 150 (1ULL << PERF_REG_X86_IP) | \ 151 (1ULL << PERF_REG_X86_FLAGS) | \ 152 (1ULL << PERF_REG_X86_R8) | \ 153 (1ULL << PERF_REG_X86_R9) | \ 154 (1ULL << PERF_REG_X86_R10) | \ 155 (1ULL << PERF_REG_X86_R11) | \ 156 (1ULL << PERF_REG_X86_R12) | \ 157 (1ULL << PERF_REG_X86_R13) | \ 158 (1ULL << PERF_REG_X86_R14) | \ 159 (1ULL << PERF_REG_X86_R15)) 160 161/* 162 * Per register state. 163 */ 164struct er_account { 165 raw_spinlock_t lock; /* per-core: protect structure */ 166 u64 config; /* extra MSR config */ 167 u64 reg; /* extra MSR number */ 168 atomic_t ref; /* reference count */ 169}; 170 171/* 172 * Per core/cpu state 173 * 174 * Used to coordinate shared registers between HT threads or 175 * among events on a single PMU. 176 */ 177struct intel_shared_regs { 178 struct er_account regs[EXTRA_REG_MAX]; 179 int refcnt; /* per-core: #HT threads */ 180 unsigned core_id; /* per-core: core id */ 181}; 182 183enum intel_excl_state_type { 184 INTEL_EXCL_UNUSED = 0, /* counter is unused */ 185 INTEL_EXCL_SHARED = 1, /* counter can be used by both threads */ 186 INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */ 187}; 188 189struct intel_excl_states { 190 enum intel_excl_state_type state[X86_PMC_IDX_MAX]; 191 bool sched_started; /* true if scheduling has started */ 192}; 193 194struct intel_excl_cntrs { 195 raw_spinlock_t lock; 196 197 struct intel_excl_states states[2]; 198 199 union { 200 u16 has_exclusive[2]; 201 u32 exclusive_present; 202 }; 203 204 int refcnt; /* per-core: #HT threads */ 205 unsigned core_id; /* per-core: core id */ 206}; 207 208struct x86_perf_task_context; 209#define MAX_LBR_ENTRIES 32 210 211enum { 212 LBR_FORMAT_32 = 0x00, 213 LBR_FORMAT_LIP = 0x01, 214 LBR_FORMAT_EIP = 0x02, 215 LBR_FORMAT_EIP_FLAGS = 0x03, 216 LBR_FORMAT_EIP_FLAGS2 = 0x04, 217 LBR_FORMAT_INFO = 0x05, 218 LBR_FORMAT_TIME = 0x06, 219 LBR_FORMAT_INFO2 = 0x07, 220 LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2, 221}; 222 223enum { 224 X86_PERF_KFREE_SHARED = 0, 225 X86_PERF_KFREE_EXCL = 1, 226 X86_PERF_KFREE_MAX 227}; 228 229struct cpu_hw_events { 230 /* 231 * Generic x86 PMC bits 232 */ 233 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ 234 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 235 unsigned long dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 236 int enabled; 237 238 int n_events; /* the # of events in the below arrays */ 239 int n_added; /* the # last events in the below arrays; 240 they've never been enabled yet */ 241 int n_txn; /* the # last events in the below arrays; 242 added in the current transaction */ 243 int n_txn_pair; 244 int n_txn_metric; 245 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 246 u64 tags[X86_PMC_IDX_MAX]; 247 248 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 249 struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; 250 251 int n_excl; /* the number of exclusive events */ 252 253 unsigned int txn_flags; 254 int is_fake; 255 256 /* 257 * Intel DebugStore bits 258 */ 259 struct debug_store *ds; 260 void *ds_pebs_vaddr; 261 void *ds_bts_vaddr; 262 u64 pebs_enabled; 263 int n_pebs; 264 int n_large_pebs; 265 int n_pebs_via_pt; 266 int pebs_output; 267 268 /* Current super set of events hardware configuration */ 269 u64 pebs_data_cfg; 270 u64 active_pebs_data_cfg; 271 int pebs_record_size; 272 273 /* 274 * Intel LBR bits 275 */ 276 int lbr_users; 277 int lbr_pebs_users; 278 struct perf_branch_stack lbr_stack; 279 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 280 union { 281 struct er_account *lbr_sel; 282 struct er_account *lbr_ctl; 283 }; 284 u64 br_sel; 285 void *last_task_ctx; 286 int last_log_id; 287 int lbr_select; 288 void *lbr_xsave; 289 290 /* 291 * Intel host/guest exclude bits 292 */ 293 u64 intel_ctrl_guest_mask; 294 u64 intel_ctrl_host_mask; 295 struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; 296 297 /* 298 * Intel checkpoint mask 299 */ 300 u64 intel_cp_status; 301 302 /* 303 * manage shared (per-core, per-cpu) registers 304 * used on Intel NHM/WSM/SNB 305 */ 306 struct intel_shared_regs *shared_regs; 307 /* 308 * manage exclusive counter access between hyperthread 309 */ 310 struct event_constraint *constraint_list; /* in enable order */ 311 struct intel_excl_cntrs *excl_cntrs; 312 int excl_thread_id; /* 0 or 1 */ 313 314 /* 315 * SKL TSX_FORCE_ABORT shadow 316 */ 317 u64 tfa_shadow; 318 319 /* 320 * Perf Metrics 321 */ 322 /* number of accepted metrics events */ 323 int n_metric; 324 325 /* 326 * AMD specific bits 327 */ 328 struct amd_nb *amd_nb; 329 int brs_active; /* BRS is enabled */ 330 331 /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ 332 u64 perf_ctr_virt_mask; 333 int n_pair; /* Large increment events */ 334 335 void *kfree_on_online[X86_PERF_KFREE_MAX]; 336 337 struct pmu *pmu; 338}; 339 340#define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \ 341 { .idxmsk64 = (n) }, \ 342 .code = (c), \ 343 .size = (e) - (c), \ 344 .cmask = (m), \ 345 .weight = (w), \ 346 .overlap = (o), \ 347 .flags = f, \ 348} 349 350#define __EVENT_CONSTRAINT(c, n, m, w, o, f) \ 351 __EVENT_CONSTRAINT_RANGE(c, c, n, m, w, o, f) 352 353#define EVENT_CONSTRAINT(c, n, m) \ 354 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0) 355 356/* 357 * The constraint_match() function only works for 'simple' event codes 358 * and not for extended (AMD64_EVENTSEL_EVENT) events codes. 359 */ 360#define EVENT_CONSTRAINT_RANGE(c, e, n, m) \ 361 __EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0) 362 363#define INTEL_EXCLEVT_CONSTRAINT(c, n) \ 364 __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\ 365 0, PERF_X86_EVENT_EXCL) 366 367/* 368 * The overlap flag marks event constraints with overlapping counter 369 * masks. This is the case if the counter mask of such an event is not 370 * a subset of any other counter mask of a constraint with an equal or 371 * higher weight, e.g.: 372 * 373 * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); 374 * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); 375 * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); 376 * 377 * The event scheduler may not select the correct counter in the first 378 * cycle because it needs to know which subsequent events will be 379 * scheduled. It may fail to schedule the events then. So we set the 380 * overlap flag for such constraints to give the scheduler a hint which 381 * events to select for counter rescheduling. 382 * 383 * Care must be taken as the rescheduling algorithm is O(n!) which 384 * will increase scheduling cycles for an over-committed system 385 * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros 386 * and its counter masks must be kept at a minimum. 387 */ 388#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ 389 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0) 390 391/* 392 * Constraint on the Event code. 393 */ 394#define INTEL_EVENT_CONSTRAINT(c, n) \ 395 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) 396 397/* 398 * Constraint on a range of Event codes 399 */ 400#define INTEL_EVENT_CONSTRAINT_RANGE(c, e, n) \ 401 EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT) 402 403/* 404 * Constraint on the Event code + UMask + fixed-mask 405 * 406 * filter mask to validate fixed counter events. 407 * the following filters disqualify for fixed counters: 408 * - inv 409 * - edge 410 * - cnt-mask 411 * - in_tx 412 * - in_tx_checkpointed 413 * The other filters are supported by fixed counters. 414 * The any-thread option is supported starting with v3. 415 */ 416#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED) 417#define FIXED_EVENT_CONSTRAINT(c, n) \ 418 EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS) 419 420/* 421 * The special metric counters do not actually exist. They are calculated from 422 * the combination of the FxCtr3 + MSR_PERF_METRICS. 423 * 424 * The special metric counters are mapped to a dummy offset for the scheduler. 425 * The sharing between multiple users of the same metric without multiplexing 426 * is not allowed, even though the hardware supports that in principle. 427 */ 428 429#define METRIC_EVENT_CONSTRAINT(c, n) \ 430 EVENT_CONSTRAINT(c, (1ULL << (INTEL_PMC_IDX_METRIC_BASE + n)), \ 431 INTEL_ARCH_EVENT_MASK) 432 433/* 434 * Constraint on the Event code + UMask 435 */ 436#define INTEL_UEVENT_CONSTRAINT(c, n) \ 437 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) 438 439/* Constraint on specific umask bit only + event */ 440#define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \ 441 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c)) 442 443/* Like UEVENT_CONSTRAINT, but match flags too */ 444#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \ 445 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) 446 447#define INTEL_EXCLUEVT_CONSTRAINT(c, n) \ 448 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ 449 HWEIGHT(n), 0, PERF_X86_EVENT_EXCL) 450 451#define INTEL_PLD_CONSTRAINT(c, n) \ 452 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ 453 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) 454 455#define INTEL_PSD_CONSTRAINT(c, n) \ 456 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ 457 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_STLAT) 458 459#define INTEL_PST_CONSTRAINT(c, n) \ 460 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ 461 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) 462 463/* Event constraint, but match on all event flags too. */ 464#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ 465 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) 466 467#define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n) \ 468 EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) 469 470/* Check only flags, but allow all event/umask */ 471#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \ 472 EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS) 473 474/* Check flags and event code, and set the HSW store flag */ 475#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \ 476 __EVENT_CONSTRAINT(code, n, \ 477 ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ 478 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) 479 480/* Check flags and event code, and set the HSW load flag */ 481#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \ 482 __EVENT_CONSTRAINT(code, n, \ 483 ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ 484 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) 485 486#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(code, end, n) \ 487 __EVENT_CONSTRAINT_RANGE(code, end, n, \ 488 ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ 489 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) 490 491#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \ 492 __EVENT_CONSTRAINT(code, n, \ 493 ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ 494 HWEIGHT(n), 0, \ 495 PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL) 496 497/* Check flags and event code/umask, and set the HSW store flag */ 498#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \ 499 __EVENT_CONSTRAINT(code, n, \ 500 INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ 501 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) 502 503#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \ 504 __EVENT_CONSTRAINT(code, n, \ 505 INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ 506 HWEIGHT(n), 0, \ 507 PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL) 508 509/* Check flags and event code/umask, and set the HSW load flag */ 510#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \ 511 __EVENT_CONSTRAINT(code, n, \ 512 INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ 513 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) 514 515#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \ 516 __EVENT_CONSTRAINT(code, n, \ 517 INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ 518 HWEIGHT(n), 0, \ 519 PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL) 520 521/* Check flags and event code/umask, and set the HSW N/A flag */ 522#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \ 523 __EVENT_CONSTRAINT(code, n, \ 524 INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ 525 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW) 526 527 528/* 529 * We define the end marker as having a weight of -1 530 * to enable blacklisting of events using a counter bitmask 531 * of zero and thus a weight of zero. 532 * The end marker has a weight that cannot possibly be 533 * obtained from counting the bits in the bitmask. 534 */ 535#define EVENT_CONSTRAINT_END { .weight = -1 } 536 537/* 538 * Check for end marker with weight == -1 539 */ 540#define for_each_event_constraint(e, c) \ 541 for ((e) = (c); (e)->weight != -1; (e)++) 542 543/* 544 * Extra registers for specific events. 545 * 546 * Some events need large masks and require external MSRs. 547 * Those extra MSRs end up being shared for all events on 548 * a PMU and sometimes between PMU of sibling HT threads. 549 * In either case, the kernel needs to handle conflicting 550 * accesses to those extra, shared, regs. The data structure 551 * to manage those registers is stored in cpu_hw_event. 552 */ 553struct extra_reg { 554 unsigned int event; 555 unsigned int msr; 556 u64 config_mask; 557 u64 valid_mask; 558 int idx; /* per_xxx->regs[] reg index */ 559 bool extra_msr_access; 560}; 561 562#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ 563 .event = (e), \ 564 .msr = (ms), \ 565 .config_mask = (m), \ 566 .valid_mask = (vm), \ 567 .idx = EXTRA_REG_##i, \ 568 .extra_msr_access = true, \ 569 } 570 571#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ 572 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) 573 574#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \ 575 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \ 576 ARCH_PERFMON_EVENTSEL_UMASK, vm, idx) 577 578#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \ 579 INTEL_UEVENT_EXTRA_REG(c, \ 580 MSR_PEBS_LD_LAT_THRESHOLD, \ 581 0xffff, \ 582 LDLAT) 583 584#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) 585 586union perf_capabilities { 587 struct { 588 u64 lbr_format:6; 589 u64 pebs_trap:1; 590 u64 pebs_arch_reg:1; 591 u64 pebs_format:4; 592 u64 smm_freeze:1; 593 /* 594 * PMU supports separate counter range for writing 595 * values > 32bit. 596 */ 597 u64 full_width_write:1; 598 u64 pebs_baseline:1; 599 u64 perf_metrics:1; 600 u64 pebs_output_pt_available:1; 601 u64 anythread_deprecated:1; 602 }; 603 u64 capabilities; 604}; 605 606struct x86_pmu_quirk { 607 struct x86_pmu_quirk *next; 608 void (*func)(void); 609}; 610 611union x86_pmu_config { 612 struct { 613 u64 event:8, 614 umask:8, 615 usr:1, 616 os:1, 617 edge:1, 618 pc:1, 619 interrupt:1, 620 __reserved1:1, 621 en:1, 622 inv:1, 623 cmask:8, 624 event2:4, 625 __reserved2:4, 626 go:1, 627 ho:1; 628 } bits; 629 u64 value; 630}; 631 632#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value 633 634enum { 635 x86_lbr_exclusive_lbr, 636 x86_lbr_exclusive_bts, 637 x86_lbr_exclusive_pt, 638 x86_lbr_exclusive_max, 639}; 640 641struct x86_hybrid_pmu { 642 struct pmu pmu; 643 const char *name; 644 u8 cpu_type; 645 cpumask_t supported_cpus; 646 union perf_capabilities intel_cap; 647 u64 intel_ctrl; 648 int max_pebs_events; 649 int num_counters; 650 int num_counters_fixed; 651 struct event_constraint unconstrained; 652 653 u64 hw_cache_event_ids 654 [PERF_COUNT_HW_CACHE_MAX] 655 [PERF_COUNT_HW_CACHE_OP_MAX] 656 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 657 u64 hw_cache_extra_regs 658 [PERF_COUNT_HW_CACHE_MAX] 659 [PERF_COUNT_HW_CACHE_OP_MAX] 660 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 661 struct event_constraint *event_constraints; 662 struct event_constraint *pebs_constraints; 663 struct extra_reg *extra_regs; 664 665 unsigned int late_ack :1, 666 mid_ack :1, 667 enabled_ack :1; 668}; 669 670static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu) 671{ 672 return container_of(pmu, struct x86_hybrid_pmu, pmu); 673} 674 675extern struct static_key_false perf_is_hybrid; 676#define is_hybrid() static_branch_unlikely(&perf_is_hybrid) 677 678#define hybrid(_pmu, _field) \ 679(*({ \ 680 typeof(&x86_pmu._field) __Fp = &x86_pmu._field; \ 681 \ 682 if (is_hybrid() && (_pmu)) \ 683 __Fp = &hybrid_pmu(_pmu)->_field; \ 684 \ 685 __Fp; \ 686})) 687 688#define hybrid_var(_pmu, _var) \ 689(*({ \ 690 typeof(&_var) __Fp = &_var; \ 691 \ 692 if (is_hybrid() && (_pmu)) \ 693 __Fp = &hybrid_pmu(_pmu)->_var; \ 694 \ 695 __Fp; \ 696})) 697 698#define hybrid_bit(_pmu, _field) \ 699({ \ 700 bool __Fp = x86_pmu._field; \ 701 \ 702 if (is_hybrid() && (_pmu)) \ 703 __Fp = hybrid_pmu(_pmu)->_field; \ 704 \ 705 __Fp; \ 706}) 707 708enum hybrid_pmu_type { 709 hybrid_big = 0x40, 710 hybrid_small = 0x20, 711 712 hybrid_big_small = hybrid_big | hybrid_small, 713}; 714 715#define X86_HYBRID_PMU_ATOM_IDX 0 716#define X86_HYBRID_PMU_CORE_IDX 1 717 718#define X86_HYBRID_NUM_PMUS 2 719 720/* 721 * struct x86_pmu - generic x86 pmu 722 */ 723struct x86_pmu { 724 /* 725 * Generic x86 PMC bits 726 */ 727 const char *name; 728 int version; 729 int (*handle_irq)(struct pt_regs *); 730 void (*disable_all)(void); 731 void (*enable_all)(int added); 732 void (*enable)(struct perf_event *); 733 void (*disable)(struct perf_event *); 734 void (*assign)(struct perf_event *event, int idx); 735 void (*add)(struct perf_event *); 736 void (*del)(struct perf_event *); 737 void (*read)(struct perf_event *event); 738 int (*hw_config)(struct perf_event *event); 739 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); 740 unsigned eventsel; 741 unsigned perfctr; 742 int (*addr_offset)(int index, bool eventsel); 743 int (*rdpmc_index)(int index); 744 u64 (*event_map)(int); 745 int max_events; 746 int num_counters; 747 int num_counters_fixed; 748 int cntval_bits; 749 u64 cntval_mask; 750 union { 751 unsigned long events_maskl; 752 unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; 753 }; 754 int events_mask_len; 755 int apic; 756 u64 max_period; 757 struct event_constraint * 758 (*get_event_constraints)(struct cpu_hw_events *cpuc, 759 int idx, 760 struct perf_event *event); 761 762 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 763 struct perf_event *event); 764 765 void (*start_scheduling)(struct cpu_hw_events *cpuc); 766 767 void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr); 768 769 void (*stop_scheduling)(struct cpu_hw_events *cpuc); 770 771 struct event_constraint *event_constraints; 772 struct x86_pmu_quirk *quirks; 773 int perfctr_second_write; 774 u64 (*limit_period)(struct perf_event *event, u64 l); 775 776 /* PMI handler bits */ 777 unsigned int late_ack :1, 778 mid_ack :1, 779 enabled_ack :1; 780 /* 781 * sysfs attrs 782 */ 783 int attr_rdpmc_broken; 784 int attr_rdpmc; 785 struct attribute **format_attrs; 786 787 ssize_t (*events_sysfs_show)(char *page, u64 config); 788 const struct attribute_group **attr_update; 789 790 unsigned long attr_freeze_on_smi; 791 792 /* 793 * CPU Hotplug hooks 794 */ 795 int (*cpu_prepare)(int cpu); 796 void (*cpu_starting)(int cpu); 797 void (*cpu_dying)(int cpu); 798 void (*cpu_dead)(int cpu); 799 800 void (*check_microcode)(void); 801 void (*sched_task)(struct perf_event_context *ctx, 802 bool sched_in); 803 804 /* 805 * Intel Arch Perfmon v2+ 806 */ 807 u64 intel_ctrl; 808 union perf_capabilities intel_cap; 809 810 /* 811 * Intel DebugStore bits 812 */ 813 unsigned int bts :1, 814 bts_active :1, 815 pebs :1, 816 pebs_active :1, 817 pebs_broken :1, 818 pebs_prec_dist :1, 819 pebs_no_tlb :1, 820 pebs_no_isolation :1, 821 pebs_block :1; 822 int pebs_record_size; 823 int pebs_buffer_size; 824 int max_pebs_events; 825 void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data); 826 struct event_constraint *pebs_constraints; 827 void (*pebs_aliases)(struct perf_event *event); 828 unsigned long large_pebs_flags; 829 u64 rtm_abort_event; 830 831 /* 832 * Intel LBR 833 */ 834 unsigned int lbr_tos, lbr_from, lbr_to, 835 lbr_info, lbr_nr; /* LBR base regs and size */ 836 union { 837 u64 lbr_sel_mask; /* LBR_SELECT valid bits */ 838 u64 lbr_ctl_mask; /* LBR_CTL valid bits */ 839 }; 840 union { 841 const int *lbr_sel_map; /* lbr_select mappings */ 842 int *lbr_ctl_map; /* LBR_CTL mappings */ 843 }; 844 bool lbr_double_abort; /* duplicated lbr aborts */ 845 bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ 846 847 unsigned int lbr_has_info:1; 848 unsigned int lbr_has_tsx:1; 849 unsigned int lbr_from_flags:1; 850 unsigned int lbr_to_cycles:1; 851 852 /* 853 * Intel Architectural LBR CPUID Enumeration 854 */ 855 unsigned int lbr_depth_mask:8; 856 unsigned int lbr_deep_c_reset:1; 857 unsigned int lbr_lip:1; 858 unsigned int lbr_cpl:1; 859 unsigned int lbr_filter:1; 860 unsigned int lbr_call_stack:1; 861 unsigned int lbr_mispred:1; 862 unsigned int lbr_timed_lbr:1; 863 unsigned int lbr_br_type:1; 864 865 void (*lbr_reset)(void); 866 void (*lbr_read)(struct cpu_hw_events *cpuc); 867 void (*lbr_save)(void *ctx); 868 void (*lbr_restore)(void *ctx); 869 870 /* 871 * Intel PT/LBR/BTS are exclusive 872 */ 873 atomic_t lbr_exclusive[x86_lbr_exclusive_max]; 874 875 /* 876 * Intel perf metrics 877 */ 878 int num_topdown_events; 879 u64 (*update_topdown_event)(struct perf_event *event); 880 int (*set_topdown_event_period)(struct perf_event *event); 881 882 /* 883 * perf task context (i.e. struct perf_event_context::task_ctx_data) 884 * switch helper to bridge calls from perf/core to perf/x86. 885 * See struct pmu::swap_task_ctx() usage for examples; 886 */ 887 void (*swap_task_ctx)(struct perf_event_context *prev, 888 struct perf_event_context *next); 889 890 /* 891 * AMD bits 892 */ 893 unsigned int amd_nb_constraints : 1; 894 u64 perf_ctr_pair_en; 895 896 /* 897 * Extra registers for events 898 */ 899 struct extra_reg *extra_regs; 900 unsigned int flags; 901 902 /* 903 * Intel host/guest support (KVM) 904 */ 905 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); 906 907 /* 908 * Check period value for PERF_EVENT_IOC_PERIOD ioctl. 909 */ 910 int (*check_period) (struct perf_event *event, u64 period); 911 912 int (*aux_output_match) (struct perf_event *event); 913 914 int (*filter_match)(struct perf_event *event); 915 /* 916 * Hybrid support 917 * 918 * Most PMU capabilities are the same among different hybrid PMUs. 919 * The global x86_pmu saves the architecture capabilities, which 920 * are available for all PMUs. The hybrid_pmu only includes the 921 * unique capabilities. 922 */ 923 int num_hybrid_pmus; 924 struct x86_hybrid_pmu *hybrid_pmu; 925 u8 (*get_hybrid_cpu_type) (void); 926}; 927 928struct x86_perf_task_context_opt { 929 int lbr_callstack_users; 930 int lbr_stack_state; 931 int log_id; 932}; 933 934struct x86_perf_task_context { 935 u64 lbr_sel; 936 int tos; 937 int valid_lbrs; 938 struct x86_perf_task_context_opt opt; 939 struct lbr_entry lbr[MAX_LBR_ENTRIES]; 940}; 941 942struct x86_perf_task_context_arch_lbr { 943 struct x86_perf_task_context_opt opt; 944 struct lbr_entry entries[]; 945}; 946 947/* 948 * Add padding to guarantee the 64-byte alignment of the state buffer. 949 * 950 * The structure is dynamically allocated. The size of the LBR state may vary 951 * based on the number of LBR registers. 952 * 953 * Do not put anything after the LBR state. 954 */ 955struct x86_perf_task_context_arch_lbr_xsave { 956 struct x86_perf_task_context_opt opt; 957 958 union { 959 struct xregs_state xsave; 960 struct { 961 struct fxregs_state i387; 962 struct xstate_header header; 963 struct arch_lbr_state lbr; 964 } __attribute__ ((packed, aligned (XSAVE_ALIGNMENT))); 965 }; 966}; 967 968#define x86_add_quirk(func_) \ 969do { \ 970 static struct x86_pmu_quirk __quirk __initdata = { \ 971 .func = func_, \ 972 }; \ 973 __quirk.next = x86_pmu.quirks; \ 974 x86_pmu.quirks = &__quirk; \ 975} while (0) 976 977/* 978 * x86_pmu flags 979 */ 980#define PMU_FL_NO_HT_SHARING 0x1 /* no hyper-threading resource sharing */ 981#define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */ 982#define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */ 983#define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ 984#define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ 985#define PMU_FL_TFA 0x20 /* deal with TSX force abort */ 986#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ 987#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */ 988#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ 989 990#define EVENT_VAR(_id) event_attr_##_id 991#define EVENT_PTR(_id) &event_attr_##_id.attr.attr 992 993#define EVENT_ATTR(_name, _id) \ 994static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ 995 .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ 996 .id = PERF_COUNT_HW_##_id, \ 997 .event_str = NULL, \ 998}; 999 1000#define EVENT_ATTR_STR(_name, v, str) \ 1001static struct perf_pmu_events_attr event_attr_##v = { \ 1002 .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ 1003 .id = 0, \ 1004 .event_str = str, \ 1005}; 1006 1007#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \ 1008static struct perf_pmu_events_ht_attr event_attr_##v = { \ 1009 .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\ 1010 .id = 0, \ 1011 .event_str_noht = noht, \ 1012 .event_str_ht = ht, \ 1013} 1014 1015#define EVENT_ATTR_STR_HYBRID(_name, v, str, _pmu) \ 1016static struct perf_pmu_events_hybrid_attr event_attr_##v = { \ 1017 .attr = __ATTR(_name, 0444, events_hybrid_sysfs_show, NULL),\ 1018 .id = 0, \ 1019 .event_str = str, \ 1020 .pmu_type = _pmu, \ 1021} 1022 1023#define FORMAT_HYBRID_PTR(_id) (&format_attr_hybrid_##_id.attr.attr) 1024 1025#define FORMAT_ATTR_HYBRID(_name, _pmu) \ 1026static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\ 1027 .attr = __ATTR_RO(_name), \ 1028 .pmu_type = _pmu, \ 1029} 1030 1031struct pmu *x86_get_pmu(unsigned int cpu); 1032extern struct x86_pmu x86_pmu __read_mostly; 1033 1034static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) 1035{ 1036 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 1037 return &((struct x86_perf_task_context_arch_lbr *)ctx)->opt; 1038 1039 return &((struct x86_perf_task_context *)ctx)->opt; 1040} 1041 1042static inline bool x86_pmu_has_lbr_callstack(void) 1043{ 1044 return x86_pmu.lbr_sel_map && 1045 x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0; 1046} 1047 1048DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); 1049 1050int x86_perf_event_set_period(struct perf_event *event); 1051 1052/* 1053 * Generalized hw caching related hw_event table, filled 1054 * in on a per model basis. A value of 0 means 1055 * 'not supported', -1 means 'hw_event makes no sense on 1056 * this CPU', any other value means the raw hw_event 1057 * ID. 1058 */ 1059 1060#define C(x) PERF_COUNT_HW_CACHE_##x 1061 1062extern u64 __read_mostly hw_cache_event_ids 1063 [PERF_COUNT_HW_CACHE_MAX] 1064 [PERF_COUNT_HW_CACHE_OP_MAX] 1065 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 1066extern u64 __read_mostly hw_cache_extra_regs 1067 [PERF_COUNT_HW_CACHE_MAX] 1068 [PERF_COUNT_HW_CACHE_OP_MAX] 1069 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 1070 1071u64 x86_perf_event_update(struct perf_event *event); 1072 1073static inline unsigned int x86_pmu_config_addr(int index) 1074{ 1075 return x86_pmu.eventsel + (x86_pmu.addr_offset ? 1076 x86_pmu.addr_offset(index, true) : index); 1077} 1078 1079static inline unsigned int x86_pmu_event_addr(int index) 1080{ 1081 return x86_pmu.perfctr + (x86_pmu.addr_offset ? 1082 x86_pmu.addr_offset(index, false) : index); 1083} 1084 1085static inline int x86_pmu_rdpmc_index(int index) 1086{ 1087 return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; 1088} 1089 1090bool check_hw_exists(struct pmu *pmu, int num_counters, 1091 int num_counters_fixed); 1092 1093int x86_add_exclusive(unsigned int what); 1094 1095void x86_del_exclusive(unsigned int what); 1096 1097int x86_reserve_hardware(void); 1098 1099void x86_release_hardware(void); 1100 1101int x86_pmu_max_precise(void); 1102 1103void hw_perf_lbr_event_destroy(struct perf_event *event); 1104 1105int x86_setup_perfctr(struct perf_event *event); 1106 1107int x86_pmu_hw_config(struct perf_event *event); 1108 1109void x86_pmu_disable_all(void); 1110 1111static inline bool has_amd_brs(struct hw_perf_event *hwc) 1112{ 1113 return hwc->flags & PERF_X86_EVENT_AMD_BRS; 1114} 1115 1116static inline bool is_counter_pair(struct hw_perf_event *hwc) 1117{ 1118 return hwc->flags & PERF_X86_EVENT_PAIR; 1119} 1120 1121static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, 1122 u64 enable_mask) 1123{ 1124 u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); 1125 1126 if (hwc->extra_reg.reg) 1127 wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); 1128 1129 /* 1130 * Add enabled Merge event on next counter 1131 * if large increment event being enabled on this counter 1132 */ 1133 if (is_counter_pair(hwc)) 1134 wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en); 1135 1136 wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); 1137} 1138 1139void x86_pmu_enable_all(int added); 1140 1141int perf_assign_events(struct event_constraint **constraints, int n, 1142 int wmin, int wmax, int gpmax, int *assign); 1143int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); 1144 1145void x86_pmu_stop(struct perf_event *event, int flags); 1146 1147static inline void x86_pmu_disable_event(struct perf_event *event) 1148{ 1149 u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); 1150 struct hw_perf_event *hwc = &event->hw; 1151 1152 wrmsrl(hwc->config_base, hwc->config & ~disable_mask); 1153 1154 if (is_counter_pair(hwc)) 1155 wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); 1156} 1157 1158void x86_pmu_enable_event(struct perf_event *event); 1159 1160int x86_pmu_handle_irq(struct pt_regs *regs); 1161 1162void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, 1163 u64 intel_ctrl); 1164 1165void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu); 1166 1167extern struct event_constraint emptyconstraint; 1168 1169extern struct event_constraint unconstrained; 1170 1171static inline bool kernel_ip(unsigned long ip) 1172{ 1173#ifdef CONFIG_X86_32 1174 return ip > PAGE_OFFSET; 1175#else 1176 return (long)ip < 0; 1177#endif 1178} 1179 1180/* 1181 * Not all PMUs provide the right context information to place the reported IP 1182 * into full context. Specifically segment registers are typically not 1183 * supplied. 1184 * 1185 * Assuming the address is a linear address (it is for IBS), we fake the CS and 1186 * vm86 mode using the known zero-based code segment and 'fix up' the registers 1187 * to reflect this. 1188 * 1189 * Intel PEBS/LBR appear to typically provide the effective address, nothing 1190 * much we can do about that but pray and treat it like a linear address. 1191 */ 1192static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) 1193{ 1194 regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS; 1195 if (regs->flags & X86_VM_MASK) 1196 regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK); 1197 regs->ip = ip; 1198} 1199 1200ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); 1201ssize_t intel_event_sysfs_show(char *page, u64 config); 1202 1203ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, 1204 char *page); 1205ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, 1206 char *page); 1207ssize_t events_hybrid_sysfs_show(struct device *dev, 1208 struct device_attribute *attr, 1209 char *page); 1210 1211static inline bool fixed_counter_disabled(int i, struct pmu *pmu) 1212{ 1213 u64 intel_ctrl = hybrid(pmu, intel_ctrl); 1214 1215 return !(intel_ctrl >> (i + INTEL_PMC_IDX_FIXED)); 1216} 1217 1218#ifdef CONFIG_CPU_SUP_AMD 1219 1220int amd_pmu_init(void); 1221 1222#ifdef CONFIG_PERF_EVENTS_AMD_BRS 1223int amd_brs_init(void); 1224void amd_brs_disable(void); 1225void amd_brs_enable(void); 1226void amd_brs_enable_all(void); 1227void amd_brs_disable_all(void); 1228void amd_brs_drain(void); 1229void amd_brs_lopwr_init(void); 1230void amd_brs_disable_all(void); 1231int amd_brs_setup_filter(struct perf_event *event); 1232void amd_brs_reset(void); 1233 1234static inline void amd_pmu_brs_add(struct perf_event *event) 1235{ 1236 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1237 1238 perf_sched_cb_inc(event->ctx->pmu); 1239 cpuc->lbr_users++; 1240 /* 1241 * No need to reset BRS because it is reset 1242 * on brs_enable() and it is saturating 1243 */ 1244} 1245 1246static inline void amd_pmu_brs_del(struct perf_event *event) 1247{ 1248 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1249 1250 cpuc->lbr_users--; 1251 WARN_ON_ONCE(cpuc->lbr_users < 0); 1252 1253 perf_sched_cb_dec(event->ctx->pmu); 1254} 1255 1256void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in); 1257#else 1258static inline int amd_brs_init(void) 1259{ 1260 return 0; 1261} 1262static inline void amd_brs_disable(void) {} 1263static inline void amd_brs_enable(void) {} 1264static inline void amd_brs_drain(void) {} 1265static inline void amd_brs_lopwr_init(void) {} 1266static inline void amd_brs_disable_all(void) {} 1267static inline int amd_brs_setup_filter(struct perf_event *event) 1268{ 1269 return 0; 1270} 1271static inline void amd_brs_reset(void) {} 1272 1273static inline void amd_pmu_brs_add(struct perf_event *event) 1274{ 1275} 1276 1277static inline void amd_pmu_brs_del(struct perf_event *event) 1278{ 1279} 1280 1281static inline void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in) 1282{ 1283} 1284 1285static inline void amd_brs_enable_all(void) 1286{ 1287} 1288 1289#endif 1290 1291#else /* CONFIG_CPU_SUP_AMD */ 1292 1293static inline int amd_pmu_init(void) 1294{ 1295 return 0; 1296} 1297 1298static inline int amd_brs_init(void) 1299{ 1300 return -EOPNOTSUPP; 1301} 1302 1303static inline void amd_brs_drain(void) 1304{ 1305} 1306 1307static inline void amd_brs_enable_all(void) 1308{ 1309} 1310 1311static inline void amd_brs_disable_all(void) 1312{ 1313} 1314#endif /* CONFIG_CPU_SUP_AMD */ 1315 1316static inline int is_pebs_pt(struct perf_event *event) 1317{ 1318 return !!(event->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT); 1319} 1320 1321#ifdef CONFIG_CPU_SUP_INTEL 1322 1323static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period) 1324{ 1325 struct hw_perf_event *hwc = &event->hw; 1326 unsigned int hw_event, bts_event; 1327 1328 if (event->attr.freq) 1329 return false; 1330 1331 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; 1332 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); 1333 1334 return hw_event == bts_event && period == 1; 1335} 1336 1337static inline bool intel_pmu_has_bts(struct perf_event *event) 1338{ 1339 struct hw_perf_event *hwc = &event->hw; 1340 1341 return intel_pmu_has_bts_period(event, hwc->sample_period); 1342} 1343 1344static __always_inline void __intel_pmu_pebs_disable_all(void) 1345{ 1346 wrmsrl(MSR_IA32_PEBS_ENABLE, 0); 1347} 1348 1349static __always_inline void __intel_pmu_arch_lbr_disable(void) 1350{ 1351 wrmsrl(MSR_ARCH_LBR_CTL, 0); 1352} 1353 1354static __always_inline void __intel_pmu_lbr_disable(void) 1355{ 1356 u64 debugctl; 1357 1358 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 1359 debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); 1360 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 1361} 1362 1363int intel_pmu_save_and_restart(struct perf_event *event); 1364 1365struct event_constraint * 1366x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 1367 struct perf_event *event); 1368 1369extern int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu); 1370extern void intel_cpuc_finish(struct cpu_hw_events *cpuc); 1371 1372int intel_pmu_init(void); 1373 1374void init_debug_store_on_cpu(int cpu); 1375 1376void fini_debug_store_on_cpu(int cpu); 1377 1378void release_ds_buffers(void); 1379 1380void reserve_ds_buffers(void); 1381 1382void release_lbr_buffers(void); 1383 1384void reserve_lbr_buffers(void); 1385 1386extern struct event_constraint bts_constraint; 1387extern struct event_constraint vlbr_constraint; 1388 1389void intel_pmu_enable_bts(u64 config); 1390 1391void intel_pmu_disable_bts(void); 1392 1393int intel_pmu_drain_bts_buffer(void); 1394 1395extern struct event_constraint intel_core2_pebs_event_constraints[]; 1396 1397extern struct event_constraint intel_atom_pebs_event_constraints[]; 1398 1399extern struct event_constraint intel_slm_pebs_event_constraints[]; 1400 1401extern struct event_constraint intel_glm_pebs_event_constraints[]; 1402 1403extern struct event_constraint intel_glp_pebs_event_constraints[]; 1404 1405extern struct event_constraint intel_grt_pebs_event_constraints[]; 1406 1407extern struct event_constraint intel_nehalem_pebs_event_constraints[]; 1408 1409extern struct event_constraint intel_westmere_pebs_event_constraints[]; 1410 1411extern struct event_constraint intel_snb_pebs_event_constraints[]; 1412 1413extern struct event_constraint intel_ivb_pebs_event_constraints[]; 1414 1415extern struct event_constraint intel_hsw_pebs_event_constraints[]; 1416 1417extern struct event_constraint intel_bdw_pebs_event_constraints[]; 1418 1419extern struct event_constraint intel_skl_pebs_event_constraints[]; 1420 1421extern struct event_constraint intel_icl_pebs_event_constraints[]; 1422 1423extern struct event_constraint intel_spr_pebs_event_constraints[]; 1424 1425struct event_constraint *intel_pebs_constraints(struct perf_event *event); 1426 1427void intel_pmu_pebs_add(struct perf_event *event); 1428 1429void intel_pmu_pebs_del(struct perf_event *event); 1430 1431void intel_pmu_pebs_enable(struct perf_event *event); 1432 1433void intel_pmu_pebs_disable(struct perf_event *event); 1434 1435void intel_pmu_pebs_enable_all(void); 1436 1437void intel_pmu_pebs_disable_all(void); 1438 1439void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in); 1440 1441void intel_pmu_auto_reload_read(struct perf_event *event); 1442 1443void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); 1444 1445void intel_ds_init(void); 1446 1447void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, 1448 struct perf_event_context *next); 1449 1450void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); 1451 1452u64 lbr_from_signext_quirk_wr(u64 val); 1453 1454void intel_pmu_lbr_reset(void); 1455 1456void intel_pmu_lbr_reset_32(void); 1457 1458void intel_pmu_lbr_reset_64(void); 1459 1460void intel_pmu_lbr_add(struct perf_event *event); 1461 1462void intel_pmu_lbr_del(struct perf_event *event); 1463 1464void intel_pmu_lbr_enable_all(bool pmi); 1465 1466void intel_pmu_lbr_disable_all(void); 1467 1468void intel_pmu_lbr_read(void); 1469 1470void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc); 1471 1472void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc); 1473 1474void intel_pmu_lbr_save(void *ctx); 1475 1476void intel_pmu_lbr_restore(void *ctx); 1477 1478void intel_pmu_lbr_init_core(void); 1479 1480void intel_pmu_lbr_init_nhm(void); 1481 1482void intel_pmu_lbr_init_atom(void); 1483 1484void intel_pmu_lbr_init_slm(void); 1485 1486void intel_pmu_lbr_init_snb(void); 1487 1488void intel_pmu_lbr_init_hsw(void); 1489 1490void intel_pmu_lbr_init_skl(void); 1491 1492void intel_pmu_lbr_init_knl(void); 1493 1494void intel_pmu_lbr_init(void); 1495 1496void intel_pmu_arch_lbr_init(void); 1497 1498void intel_pmu_pebs_data_source_nhm(void); 1499 1500void intel_pmu_pebs_data_source_skl(bool pmem); 1501 1502int intel_pmu_setup_lbr_filter(struct perf_event *event); 1503 1504void intel_pt_interrupt(void); 1505 1506int intel_bts_interrupt(void); 1507 1508void intel_bts_enable_local(void); 1509 1510void intel_bts_disable_local(void); 1511 1512int p4_pmu_init(void); 1513 1514int p6_pmu_init(void); 1515 1516int knc_pmu_init(void); 1517 1518static inline int is_ht_workaround_enabled(void) 1519{ 1520 return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED); 1521} 1522 1523#else /* CONFIG_CPU_SUP_INTEL */ 1524 1525static inline void reserve_ds_buffers(void) 1526{ 1527} 1528 1529static inline void release_ds_buffers(void) 1530{ 1531} 1532 1533static inline void release_lbr_buffers(void) 1534{ 1535} 1536 1537static inline void reserve_lbr_buffers(void) 1538{ 1539} 1540 1541static inline int intel_pmu_init(void) 1542{ 1543 return 0; 1544} 1545 1546static inline int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) 1547{ 1548 return 0; 1549} 1550 1551static inline void intel_cpuc_finish(struct cpu_hw_events *cpuc) 1552{ 1553} 1554 1555static inline int is_ht_workaround_enabled(void) 1556{ 1557 return 0; 1558} 1559#endif /* CONFIG_CPU_SUP_INTEL */ 1560 1561#if ((defined CONFIG_CPU_SUP_CENTAUR) || (defined CONFIG_CPU_SUP_ZHAOXIN)) 1562int zhaoxin_pmu_init(void); 1563#else 1564static inline int zhaoxin_pmu_init(void) 1565{ 1566 return 0; 1567} 1568#endif /*CONFIG_CPU_SUP_CENTAUR or CONFIG_CPU_SUP_ZHAOXIN*/