cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

core.c (188667B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Per core/cpu state
      4 *
      5 * Used to coordinate shared registers between HT threads or
      6 * among events on a single PMU.
      7 */
      8
      9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     10
     11#include <linux/stddef.h>
     12#include <linux/types.h>
     13#include <linux/init.h>
     14#include <linux/slab.h>
     15#include <linux/export.h>
     16#include <linux/nmi.h>
     17
     18#include <asm/cpufeature.h>
     19#include <asm/hardirq.h>
     20#include <asm/intel-family.h>
     21#include <asm/intel_pt.h>
     22#include <asm/apic.h>
     23#include <asm/cpu_device_id.h>
     24
     25#include "../perf_event.h"
     26
     27/*
     28 * Intel PerfMon, used on Core and later.
     29 */
     30static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
     31{
     32	[PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
     33	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
     34	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
     35	[PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
     36	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
     37	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
     38	[PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
     39	[PERF_COUNT_HW_REF_CPU_CYCLES]		= 0x0300, /* pseudo-encoding */
     40};
     41
     42static struct event_constraint intel_core_event_constraints[] __read_mostly =
     43{
     44	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
     45	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
     46	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
     47	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
     48	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
     49	INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
     50	EVENT_CONSTRAINT_END
     51};
     52
     53static struct event_constraint intel_core2_event_constraints[] __read_mostly =
     54{
     55	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
     56	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
     57	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
     58	INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
     59	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
     60	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
     61	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
     62	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
     63	INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
     64	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
     65	INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
     66	INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
     67	INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
     68	EVENT_CONSTRAINT_END
     69};
     70
     71static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
     72{
     73	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
     74	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
     75	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
     76	INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
     77	INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
     78	INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
     79	INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
     80	INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
     81	INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
     82	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
     83	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
     84	EVENT_CONSTRAINT_END
     85};
     86
     87static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
     88{
     89	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
     90	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
     91	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
     92	EVENT_EXTRA_END
     93};
     94
     95static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
     96{
     97	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
     98	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
     99	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
    100	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
    101	INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
    102	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
    103	INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
    104	EVENT_CONSTRAINT_END
    105};
    106
    107static struct event_constraint intel_snb_event_constraints[] __read_mostly =
    108{
    109	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
    110	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
    111	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
    112	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
    113	INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
    114	INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
    115	INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
    116	INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
    117	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
    118	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
    119	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
    120	INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
    121
    122	/*
    123	 * When HT is off these events can only run on the bottom 4 counters
    124	 * When HT is on, they are impacted by the HT bug and require EXCL access
    125	 */
    126	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
    127	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
    128	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
    129	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
    130
    131	EVENT_CONSTRAINT_END
    132};
    133
    134static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
    135{
    136	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
    137	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
    138	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
    139	INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
    140	INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMPTY */
    141	INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
    142	INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
    143	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
    144	INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
    145	INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
    146	INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
    147	INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
    148	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
    149
    150	/*
    151	 * When HT is off these events can only run on the bottom 4 counters
    152	 * When HT is on, they are impacted by the HT bug and require EXCL access
    153	 */
    154	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
    155	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
    156	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
    157	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
    158
    159	EVENT_CONSTRAINT_END
    160};
    161
    162static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
    163{
    164	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
    165	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
    166	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
    167	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
    168	EVENT_EXTRA_END
    169};
    170
    171static struct event_constraint intel_v1_event_constraints[] __read_mostly =
    172{
    173	EVENT_CONSTRAINT_END
    174};
    175
    176static struct event_constraint intel_gen_event_constraints[] __read_mostly =
    177{
    178	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
    179	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
    180	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
    181	EVENT_CONSTRAINT_END
    182};
    183
    184static struct event_constraint intel_v5_gen_event_constraints[] __read_mostly =
    185{
    186	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
    187	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
    188	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
    189	FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
    190	FIXED_EVENT_CONSTRAINT(0x0500, 4),
    191	FIXED_EVENT_CONSTRAINT(0x0600, 5),
    192	FIXED_EVENT_CONSTRAINT(0x0700, 6),
    193	FIXED_EVENT_CONSTRAINT(0x0800, 7),
    194	FIXED_EVENT_CONSTRAINT(0x0900, 8),
    195	FIXED_EVENT_CONSTRAINT(0x0a00, 9),
    196	FIXED_EVENT_CONSTRAINT(0x0b00, 10),
    197	FIXED_EVENT_CONSTRAINT(0x0c00, 11),
    198	FIXED_EVENT_CONSTRAINT(0x0d00, 12),
    199	FIXED_EVENT_CONSTRAINT(0x0e00, 13),
    200	FIXED_EVENT_CONSTRAINT(0x0f00, 14),
    201	FIXED_EVENT_CONSTRAINT(0x1000, 15),
    202	EVENT_CONSTRAINT_END
    203};
    204
    205static struct event_constraint intel_slm_event_constraints[] __read_mostly =
    206{
    207	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
    208	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
    209	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
    210	EVENT_CONSTRAINT_END
    211};
    212
    213static struct event_constraint intel_skl_event_constraints[] = {
    214	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
    215	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
    216	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
    217	INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2),	/* INST_RETIRED.PREC_DIST */
    218
    219	/*
    220	 * when HT is off, these can only run on the bottom 4 counters
    221	 */
    222	INTEL_EVENT_CONSTRAINT(0xd0, 0xf),	/* MEM_INST_RETIRED.* */
    223	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),	/* MEM_LOAD_RETIRED.* */
    224	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),	/* MEM_LOAD_L3_HIT_RETIRED.* */
    225	INTEL_EVENT_CONSTRAINT(0xcd, 0xf),	/* MEM_TRANS_RETIRED.* */
    226	INTEL_EVENT_CONSTRAINT(0xc6, 0xf),	/* FRONTEND_RETIRED.* */
    227
    228	EVENT_CONSTRAINT_END
    229};
    230
    231static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
    232	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
    233	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
    234	EVENT_EXTRA_END
    235};
    236
    237static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
    238	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
    239	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
    240	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
    241	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
    242	EVENT_EXTRA_END
    243};
    244
    245static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
    246	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
    247	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
    248	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
    249	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
    250	EVENT_EXTRA_END
    251};
    252
    253static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
    254	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
    255	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
    256	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
    257	/*
    258	 * Note the low 8 bits eventsel code is not a continuous field, containing
    259	 * some #GPing bits. These are masked out.
    260	 */
    261	INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
    262	EVENT_EXTRA_END
    263};
    264
    265static struct event_constraint intel_icl_event_constraints[] = {
    266	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
    267	FIXED_EVENT_CONSTRAINT(0x01c0, 0),	/* old INST_RETIRED.PREC_DIST */
    268	FIXED_EVENT_CONSTRAINT(0x0100, 0),	/* INST_RETIRED.PREC_DIST */
    269	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
    270	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
    271	FIXED_EVENT_CONSTRAINT(0x0400, 3),	/* SLOTS */
    272	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
    273	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
    274	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
    275	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
    276	INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
    277	INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
    278	INTEL_EVENT_CONSTRAINT(0x32, 0xf),	/* SW_PREFETCH_ACCESS.* */
    279	INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf),
    280	INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
    281	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff),  /* CYCLE_ACTIVITY.STALLS_TOTAL */
    282	INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff),  /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */
    283	INTEL_UEVENT_CONSTRAINT(0x14a3, 0xff),  /* CYCLE_ACTIVITY.STALLS_MEM_ANY */
    284	INTEL_EVENT_CONSTRAINT(0xa3, 0xf),      /* CYCLE_ACTIVITY.* */
    285	INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf),
    286	INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf),
    287	INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf),
    288	INTEL_EVENT_CONSTRAINT(0xef, 0xf),
    289	INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf),
    290	EVENT_CONSTRAINT_END
    291};
    292
    293static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
    294	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0),
    295	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1),
    296	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
    297	INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
    298	EVENT_EXTRA_END
    299};
    300
    301static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
    302	INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
    303	INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
    304	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
    305	INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
    306	INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
    307	INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
    308	EVENT_EXTRA_END
    309};
    310
    311static struct event_constraint intel_spr_event_constraints[] = {
    312	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
    313	FIXED_EVENT_CONSTRAINT(0x0100, 0),	/* INST_RETIRED.PREC_DIST */
    314	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
    315	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
    316	FIXED_EVENT_CONSTRAINT(0x0400, 3),	/* SLOTS */
    317	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
    318	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
    319	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
    320	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
    321	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4),
    322	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5),
    323	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
    324	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),
    325
    326	INTEL_EVENT_CONSTRAINT(0x2e, 0xff),
    327	INTEL_EVENT_CONSTRAINT(0x3c, 0xff),
    328	/*
    329	 * Generally event codes < 0x90 are restricted to counters 0-3.
    330	 * The 0x2E and 0x3C are exception, which has no restriction.
    331	 */
    332	INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf),
    333
    334	INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf),
    335	INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf),
    336	INTEL_UEVENT_CONSTRAINT(0x08a3, 0xf),
    337	INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
    338	INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
    339	INTEL_UEVENT_CONSTRAINT(0x02cd, 0x1),
    340	INTEL_EVENT_CONSTRAINT(0xce, 0x1),
    341	INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
    342	/*
    343	 * Generally event codes >= 0x90 are likely to have no restrictions.
    344	 * The exception are defined as above.
    345	 */
    346	INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0xff),
    347
    348	EVENT_CONSTRAINT_END
    349};
    350
    351
    352EVENT_ATTR_STR(mem-loads,	mem_ld_nhm,	"event=0x0b,umask=0x10,ldlat=3");
    353EVENT_ATTR_STR(mem-loads,	mem_ld_snb,	"event=0xcd,umask=0x1,ldlat=3");
    354EVENT_ATTR_STR(mem-stores,	mem_st_snb,	"event=0xcd,umask=0x2");
    355
    356static struct attribute *nhm_mem_events_attrs[] = {
    357	EVENT_PTR(mem_ld_nhm),
    358	NULL,
    359};
    360
    361/*
    362 * topdown events for Intel Core CPUs.
    363 *
    364 * The events are all in slots, which is a free slot in a 4 wide
    365 * pipeline. Some events are already reported in slots, for cycle
    366 * events we multiply by the pipeline width (4).
    367 *
    368 * With Hyper Threading on, topdown metrics are either summed or averaged
    369 * between the threads of a core: (count_t0 + count_t1).
    370 *
    371 * For the average case the metric is always scaled to pipeline width,
    372 * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
    373 */
    374
    375EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
    376	"event=0x3c,umask=0x0",			/* cpu_clk_unhalted.thread */
    377	"event=0x3c,umask=0x0,any=1");		/* cpu_clk_unhalted.thread_any */
    378EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
    379EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
    380	"event=0xe,umask=0x1");			/* uops_issued.any */
    381EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
    382	"event=0xc2,umask=0x2");		/* uops_retired.retire_slots */
    383EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
    384	"event=0x9c,umask=0x1");		/* idq_uops_not_delivered_core */
    385EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
    386	"event=0xd,umask=0x3,cmask=1",		/* int_misc.recovery_cycles */
    387	"event=0xd,umask=0x3,cmask=1,any=1");	/* int_misc.recovery_cycles_any */
    388EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
    389	"4", "2");
    390
    391EVENT_ATTR_STR(slots,			slots,			"event=0x00,umask=0x4");
    392EVENT_ATTR_STR(topdown-retiring,	td_retiring,		"event=0x00,umask=0x80");
    393EVENT_ATTR_STR(topdown-bad-spec,	td_bad_spec,		"event=0x00,umask=0x81");
    394EVENT_ATTR_STR(topdown-fe-bound,	td_fe_bound,		"event=0x00,umask=0x82");
    395EVENT_ATTR_STR(topdown-be-bound,	td_be_bound,		"event=0x00,umask=0x83");
    396EVENT_ATTR_STR(topdown-heavy-ops,	td_heavy_ops,		"event=0x00,umask=0x84");
    397EVENT_ATTR_STR(topdown-br-mispredict,	td_br_mispredict,	"event=0x00,umask=0x85");
    398EVENT_ATTR_STR(topdown-fetch-lat,	td_fetch_lat,		"event=0x00,umask=0x86");
    399EVENT_ATTR_STR(topdown-mem-bound,	td_mem_bound,		"event=0x00,umask=0x87");
    400
    401static struct attribute *snb_events_attrs[] = {
    402	EVENT_PTR(td_slots_issued),
    403	EVENT_PTR(td_slots_retired),
    404	EVENT_PTR(td_fetch_bubbles),
    405	EVENT_PTR(td_total_slots),
    406	EVENT_PTR(td_total_slots_scale),
    407	EVENT_PTR(td_recovery_bubbles),
    408	EVENT_PTR(td_recovery_bubbles_scale),
    409	NULL,
    410};
    411
    412static struct attribute *snb_mem_events_attrs[] = {
    413	EVENT_PTR(mem_ld_snb),
    414	EVENT_PTR(mem_st_snb),
    415	NULL,
    416};
    417
    418static struct event_constraint intel_hsw_event_constraints[] = {
    419	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
    420	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
    421	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
    422	INTEL_UEVENT_CONSTRAINT(0x148, 0x4),	/* L1D_PEND_MISS.PENDING */
    423	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
    424	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
    425	/* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
    426	INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
    427	/* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
    428	INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
    429	/* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
    430	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
    431
    432	/*
    433	 * When HT is off these events can only run on the bottom 4 counters
    434	 * When HT is on, they are impacted by the HT bug and require EXCL access
    435	 */
    436	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
    437	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
    438	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
    439	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
    440
    441	EVENT_CONSTRAINT_END
    442};
    443
    444static struct event_constraint intel_bdw_event_constraints[] = {
    445	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
    446	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
    447	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
    448	INTEL_UEVENT_CONSTRAINT(0x148, 0x4),	/* L1D_PEND_MISS.PENDING */
    449	INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4),	/* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
    450	/*
    451	 * when HT is off, these can only run on the bottom 4 counters
    452	 */
    453	INTEL_EVENT_CONSTRAINT(0xd0, 0xf),	/* MEM_INST_RETIRED.* */
    454	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),	/* MEM_LOAD_RETIRED.* */
    455	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),	/* MEM_LOAD_L3_HIT_RETIRED.* */
    456	INTEL_EVENT_CONSTRAINT(0xcd, 0xf),	/* MEM_TRANS_RETIRED.* */
    457	EVENT_CONSTRAINT_END
    458};
    459
    460static u64 intel_pmu_event_map(int hw_event)
    461{
    462	return intel_perfmon_event_map[hw_event];
    463}
    464
    465static __initconst const u64 spr_hw_cache_event_ids
    466				[PERF_COUNT_HW_CACHE_MAX]
    467				[PERF_COUNT_HW_CACHE_OP_MAX]
    468				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
    469{
    470 [ C(L1D ) ] = {
    471	[ C(OP_READ) ] = {
    472		[ C(RESULT_ACCESS) ] = 0x81d0,
    473		[ C(RESULT_MISS)   ] = 0xe124,
    474	},
    475	[ C(OP_WRITE) ] = {
    476		[ C(RESULT_ACCESS) ] = 0x82d0,
    477	},
    478 },
    479 [ C(L1I ) ] = {
    480	[ C(OP_READ) ] = {
    481		[ C(RESULT_MISS)   ] = 0xe424,
    482	},
    483	[ C(OP_WRITE) ] = {
    484		[ C(RESULT_ACCESS) ] = -1,
    485		[ C(RESULT_MISS)   ] = -1,
    486	},
    487 },
    488 [ C(LL  ) ] = {
    489	[ C(OP_READ) ] = {
    490		[ C(RESULT_ACCESS) ] = 0x12a,
    491		[ C(RESULT_MISS)   ] = 0x12a,
    492	},
    493	[ C(OP_WRITE) ] = {
    494		[ C(RESULT_ACCESS) ] = 0x12a,
    495		[ C(RESULT_MISS)   ] = 0x12a,
    496	},
    497 },
    498 [ C(DTLB) ] = {
    499	[ C(OP_READ) ] = {
    500		[ C(RESULT_ACCESS) ] = 0x81d0,
    501		[ C(RESULT_MISS)   ] = 0xe12,
    502	},
    503	[ C(OP_WRITE) ] = {
    504		[ C(RESULT_ACCESS) ] = 0x82d0,
    505		[ C(RESULT_MISS)   ] = 0xe13,
    506	},
    507 },
    508 [ C(ITLB) ] = {
    509	[ C(OP_READ) ] = {
    510		[ C(RESULT_ACCESS) ] = -1,
    511		[ C(RESULT_MISS)   ] = 0xe11,
    512	},
    513	[ C(OP_WRITE) ] = {
    514		[ C(RESULT_ACCESS) ] = -1,
    515		[ C(RESULT_MISS)   ] = -1,
    516	},
    517	[ C(OP_PREFETCH) ] = {
    518		[ C(RESULT_ACCESS) ] = -1,
    519		[ C(RESULT_MISS)   ] = -1,
    520	},
    521 },
    522 [ C(BPU ) ] = {
    523	[ C(OP_READ) ] = {
    524		[ C(RESULT_ACCESS) ] = 0x4c4,
    525		[ C(RESULT_MISS)   ] = 0x4c5,
    526	},
    527	[ C(OP_WRITE) ] = {
    528		[ C(RESULT_ACCESS) ] = -1,
    529		[ C(RESULT_MISS)   ] = -1,
    530	},
    531	[ C(OP_PREFETCH) ] = {
    532		[ C(RESULT_ACCESS) ] = -1,
    533		[ C(RESULT_MISS)   ] = -1,
    534	},
    535 },
    536 [ C(NODE) ] = {
    537	[ C(OP_READ) ] = {
    538		[ C(RESULT_ACCESS) ] = 0x12a,
    539		[ C(RESULT_MISS)   ] = 0x12a,
    540	},
    541 },
    542};
    543
    544static __initconst const u64 spr_hw_cache_extra_regs
    545				[PERF_COUNT_HW_CACHE_MAX]
    546				[PERF_COUNT_HW_CACHE_OP_MAX]
    547				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
    548{
    549 [ C(LL  ) ] = {
    550	[ C(OP_READ) ] = {
    551		[ C(RESULT_ACCESS) ] = 0x10001,
    552		[ C(RESULT_MISS)   ] = 0x3fbfc00001,
    553	},
    554	[ C(OP_WRITE) ] = {
    555		[ C(RESULT_ACCESS) ] = 0x3f3ffc0002,
    556		[ C(RESULT_MISS)   ] = 0x3f3fc00002,
    557	},
    558 },
    559 [ C(NODE) ] = {
    560	[ C(OP_READ) ] = {
    561		[ C(RESULT_ACCESS) ] = 0x10c000001,
    562		[ C(RESULT_MISS)   ] = 0x3fb3000001,
    563	},
    564 },
    565};
    566
    567/*
    568 * Notes on the events:
    569 * - data reads do not include code reads (comparable to earlier tables)
    570 * - data counts include speculative execution (except L1 write, dtlb, bpu)
    571 * - remote node access includes remote memory, remote cache, remote mmio.
    572 * - prefetches are not included in the counts.
    573 * - icache miss does not include decoded icache
    574 */
    575
    576#define SKL_DEMAND_DATA_RD		BIT_ULL(0)
    577#define SKL_DEMAND_RFO			BIT_ULL(1)
    578#define SKL_ANY_RESPONSE		BIT_ULL(16)
    579#define SKL_SUPPLIER_NONE		BIT_ULL(17)
    580#define SKL_L3_MISS_LOCAL_DRAM		BIT_ULL(26)
    581#define SKL_L3_MISS_REMOTE_HOP0_DRAM	BIT_ULL(27)
    582#define SKL_L3_MISS_REMOTE_HOP1_DRAM	BIT_ULL(28)
    583#define SKL_L3_MISS_REMOTE_HOP2P_DRAM	BIT_ULL(29)
    584#define SKL_L3_MISS			(SKL_L3_MISS_LOCAL_DRAM| \
    585					 SKL_L3_MISS_REMOTE_HOP0_DRAM| \
    586					 SKL_L3_MISS_REMOTE_HOP1_DRAM| \
    587					 SKL_L3_MISS_REMOTE_HOP2P_DRAM)
    588#define SKL_SPL_HIT			BIT_ULL(30)
    589#define SKL_SNOOP_NONE			BIT_ULL(31)
    590#define SKL_SNOOP_NOT_NEEDED		BIT_ULL(32)
    591#define SKL_SNOOP_MISS			BIT_ULL(33)
    592#define SKL_SNOOP_HIT_NO_FWD		BIT_ULL(34)
    593#define SKL_SNOOP_HIT_WITH_FWD		BIT_ULL(35)
    594#define SKL_SNOOP_HITM			BIT_ULL(36)
    595#define SKL_SNOOP_NON_DRAM		BIT_ULL(37)
    596#define SKL_ANY_SNOOP			(SKL_SPL_HIT|SKL_SNOOP_NONE| \
    597					 SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
    598					 SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
    599					 SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
    600#define SKL_DEMAND_READ			SKL_DEMAND_DATA_RD
    601#define SKL_SNOOP_DRAM			(SKL_SNOOP_NONE| \
    602					 SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
    603					 SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
    604					 SKL_SNOOP_HITM|SKL_SPL_HIT)
    605#define SKL_DEMAND_WRITE		SKL_DEMAND_RFO
    606#define SKL_LLC_ACCESS			SKL_ANY_RESPONSE
    607#define SKL_L3_MISS_REMOTE		(SKL_L3_MISS_REMOTE_HOP0_DRAM| \
    608					 SKL_L3_MISS_REMOTE_HOP1_DRAM| \
    609					 SKL_L3_MISS_REMOTE_HOP2P_DRAM)
    610
    611static __initconst const u64 skl_hw_cache_event_ids
    612				[PERF_COUNT_HW_CACHE_MAX]
    613				[PERF_COUNT_HW_CACHE_OP_MAX]
    614				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
    615{
    616 [ C(L1D ) ] = {
    617	[ C(OP_READ) ] = {
    618		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_INST_RETIRED.ALL_LOADS */
    619		[ C(RESULT_MISS)   ] = 0x151,	/* L1D.REPLACEMENT */
    620	},
    621	[ C(OP_WRITE) ] = {
    622		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_INST_RETIRED.ALL_STORES */
    623		[ C(RESULT_MISS)   ] = 0x0,
    624	},
    625	[ C(OP_PREFETCH) ] = {
    626		[ C(RESULT_ACCESS) ] = 0x0,
    627		[ C(RESULT_MISS)   ] = 0x0,
    628	},
    629 },
    630 [ C(L1I ) ] = {
    631	[ C(OP_READ) ] = {
    632		[ C(RESULT_ACCESS) ] = 0x0,
    633		[ C(RESULT_MISS)   ] = 0x283,	/* ICACHE_64B.MISS */
    634	},
    635	[ C(OP_WRITE) ] = {
    636		[ C(RESULT_ACCESS) ] = -1,
    637		[ C(RESULT_MISS)   ] = -1,
    638	},
    639	[ C(OP_PREFETCH) ] = {
    640		[ C(RESULT_ACCESS) ] = 0x0,
    641		[ C(RESULT_MISS)   ] = 0x0,
    642	},
    643 },
    644 [ C(LL  ) ] = {
    645	[ C(OP_READ) ] = {
    646		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
    647		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
    648	},
    649	[ C(OP_WRITE) ] = {
    650		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
    651		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
    652	},
    653	[ C(OP_PREFETCH) ] = {
    654		[ C(RESULT_ACCESS) ] = 0x0,
    655		[ C(RESULT_MISS)   ] = 0x0,
    656	},
    657 },
    658 [ C(DTLB) ] = {
    659	[ C(OP_READ) ] = {
    660		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_INST_RETIRED.ALL_LOADS */
    661		[ C(RESULT_MISS)   ] = 0xe08,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
    662	},
    663	[ C(OP_WRITE) ] = {
    664		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_INST_RETIRED.ALL_STORES */
    665		[ C(RESULT_MISS)   ] = 0xe49,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
    666	},
    667	[ C(OP_PREFETCH) ] = {
    668		[ C(RESULT_ACCESS) ] = 0x0,
    669		[ C(RESULT_MISS)   ] = 0x0,
    670	},
    671 },
    672 [ C(ITLB) ] = {
    673	[ C(OP_READ) ] = {
    674		[ C(RESULT_ACCESS) ] = 0x2085,	/* ITLB_MISSES.STLB_HIT */
    675		[ C(RESULT_MISS)   ] = 0xe85,	/* ITLB_MISSES.WALK_COMPLETED */
    676	},
    677	[ C(OP_WRITE) ] = {
    678		[ C(RESULT_ACCESS) ] = -1,
    679		[ C(RESULT_MISS)   ] = -1,
    680	},
    681	[ C(OP_PREFETCH) ] = {
    682		[ C(RESULT_ACCESS) ] = -1,
    683		[ C(RESULT_MISS)   ] = -1,
    684	},
    685 },
    686 [ C(BPU ) ] = {
    687	[ C(OP_READ) ] = {
    688		[ C(RESULT_ACCESS) ] = 0xc4,	/* BR_INST_RETIRED.ALL_BRANCHES */
    689		[ C(RESULT_MISS)   ] = 0xc5,	/* BR_MISP_RETIRED.ALL_BRANCHES */
    690	},
    691	[ C(OP_WRITE) ] = {
    692		[ C(RESULT_ACCESS) ] = -1,
    693		[ C(RESULT_MISS)   ] = -1,
    694	},
    695	[ C(OP_PREFETCH) ] = {
    696		[ C(RESULT_ACCESS) ] = -1,
    697		[ C(RESULT_MISS)   ] = -1,
    698	},
    699 },
    700 [ C(NODE) ] = {
    701	[ C(OP_READ) ] = {
    702		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
    703		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
    704	},
    705	[ C(OP_WRITE) ] = {
    706		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
    707		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
    708	},
    709	[ C(OP_PREFETCH) ] = {
    710		[ C(RESULT_ACCESS) ] = 0x0,
    711		[ C(RESULT_MISS)   ] = 0x0,
    712	},
    713 },
    714};
    715
    716static __initconst const u64 skl_hw_cache_extra_regs
    717				[PERF_COUNT_HW_CACHE_MAX]
    718				[PERF_COUNT_HW_CACHE_OP_MAX]
    719				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
    720{
    721 [ C(LL  ) ] = {
    722	[ C(OP_READ) ] = {
    723		[ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
    724				       SKL_LLC_ACCESS|SKL_ANY_SNOOP,
    725		[ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
    726				       SKL_L3_MISS|SKL_ANY_SNOOP|
    727				       SKL_SUPPLIER_NONE,
    728	},
    729	[ C(OP_WRITE) ] = {
    730		[ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
    731				       SKL_LLC_ACCESS|SKL_ANY_SNOOP,
    732		[ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
    733				       SKL_L3_MISS|SKL_ANY_SNOOP|
    734				       SKL_SUPPLIER_NONE,
    735	},
    736	[ C(OP_PREFETCH) ] = {
    737		[ C(RESULT_ACCESS) ] = 0x0,
    738		[ C(RESULT_MISS)   ] = 0x0,
    739	},
    740 },
    741 [ C(NODE) ] = {
    742	[ C(OP_READ) ] = {
    743		[ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
    744				       SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
    745		[ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
    746				       SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
    747	},
    748	[ C(OP_WRITE) ] = {
    749		[ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
    750				       SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
    751		[ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
    752				       SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
    753	},
    754	[ C(OP_PREFETCH) ] = {
    755		[ C(RESULT_ACCESS) ] = 0x0,
    756		[ C(RESULT_MISS)   ] = 0x0,
    757	},
    758 },
    759};
    760
    761#define SNB_DMND_DATA_RD	(1ULL << 0)
    762#define SNB_DMND_RFO		(1ULL << 1)
    763#define SNB_DMND_IFETCH		(1ULL << 2)
    764#define SNB_DMND_WB		(1ULL << 3)
    765#define SNB_PF_DATA_RD		(1ULL << 4)
    766#define SNB_PF_RFO		(1ULL << 5)
    767#define SNB_PF_IFETCH		(1ULL << 6)
    768#define SNB_LLC_DATA_RD		(1ULL << 7)
    769#define SNB_LLC_RFO		(1ULL << 8)
    770#define SNB_LLC_IFETCH		(1ULL << 9)
    771#define SNB_BUS_LOCKS		(1ULL << 10)
    772#define SNB_STRM_ST		(1ULL << 11)
    773#define SNB_OTHER		(1ULL << 15)
    774#define SNB_RESP_ANY		(1ULL << 16)
    775#define SNB_NO_SUPP		(1ULL << 17)
    776#define SNB_LLC_HITM		(1ULL << 18)
    777#define SNB_LLC_HITE		(1ULL << 19)
    778#define SNB_LLC_HITS		(1ULL << 20)
    779#define SNB_LLC_HITF		(1ULL << 21)
    780#define SNB_LOCAL		(1ULL << 22)
    781#define SNB_REMOTE		(0xffULL << 23)
    782#define SNB_SNP_NONE		(1ULL << 31)
    783#define SNB_SNP_NOT_NEEDED	(1ULL << 32)
    784#define SNB_SNP_MISS		(1ULL << 33)
    785#define SNB_NO_FWD		(1ULL << 34)
    786#define SNB_SNP_FWD		(1ULL << 35)
    787#define SNB_HITM		(1ULL << 36)
    788#define SNB_NON_DRAM		(1ULL << 37)
    789
    790#define SNB_DMND_READ		(SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
    791#define SNB_DMND_WRITE		(SNB_DMND_RFO|SNB_LLC_RFO)
    792#define SNB_DMND_PREFETCH	(SNB_PF_DATA_RD|SNB_PF_RFO)
    793
    794#define SNB_SNP_ANY		(SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
    795				 SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
    796				 SNB_HITM)
    797
    798#define SNB_DRAM_ANY		(SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
    799#define SNB_DRAM_REMOTE		(SNB_REMOTE|SNB_SNP_ANY)
    800
    801#define SNB_L3_ACCESS		SNB_RESP_ANY
    802#define SNB_L3_MISS		(SNB_DRAM_ANY|SNB_NON_DRAM)
    803
    804static __initconst const u64 snb_hw_cache_extra_regs
    805				[PERF_COUNT_HW_CACHE_MAX]
    806				[PERF_COUNT_HW_CACHE_OP_MAX]
    807				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
    808{
    809 [ C(LL  ) ] = {
    810	[ C(OP_READ) ] = {
    811		[ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
    812		[ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_L3_MISS,
    813	},
    814	[ C(OP_WRITE) ] = {
    815		[ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
    816		[ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_L3_MISS,
    817	},
    818	[ C(OP_PREFETCH) ] = {
    819		[ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
    820		[ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
    821	},
    822 },
    823 [ C(NODE) ] = {
    824	[ C(OP_READ) ] = {
    825		[ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
    826		[ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
    827	},
    828	[ C(OP_WRITE) ] = {
    829		[ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
    830		[ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
    831	},
    832	[ C(OP_PREFETCH) ] = {
    833		[ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
    834		[ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
    835	},
    836 },
    837};
    838
    839static __initconst const u64 snb_hw_cache_event_ids
    840				[PERF_COUNT_HW_CACHE_MAX]
    841				[PERF_COUNT_HW_CACHE_OP_MAX]
    842				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
    843{
    844 [ C(L1D) ] = {
    845	[ C(OP_READ) ] = {
    846		[ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS        */
    847		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPLACEMENT              */
    848	},
    849	[ C(OP_WRITE) ] = {
    850		[ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES       */
    851		[ C(RESULT_MISS)   ] = 0x0851, /* L1D.ALL_M_REPLACEMENT        */
    852	},
    853	[ C(OP_PREFETCH) ] = {
    854		[ C(RESULT_ACCESS) ] = 0x0,
    855		[ C(RESULT_MISS)   ] = 0x024e, /* HW_PRE_REQ.DL1_MISS          */
    856	},
    857 },
    858 [ C(L1I ) ] = {
    859	[ C(OP_READ) ] = {
    860		[ C(RESULT_ACCESS) ] = 0x0,
    861		[ C(RESULT_MISS)   ] = 0x0280, /* ICACHE.MISSES */
    862	},
    863	[ C(OP_WRITE) ] = {
    864		[ C(RESULT_ACCESS) ] = -1,
    865		[ C(RESULT_MISS)   ] = -1,
    866	},
    867	[ C(OP_PREFETCH) ] = {
    868		[ C(RESULT_ACCESS) ] = 0x0,
    869		[ C(RESULT_MISS)   ] = 0x0,
    870	},
    871 },
    872 [ C(LL  ) ] = {
    873	[ C(OP_READ) ] = {
    874		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
    875		[ C(RESULT_ACCESS) ] = 0x01b7,
    876		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
    877		[ C(RESULT_MISS)   ] = 0x01b7,
    878	},
    879	[ C(OP_WRITE) ] = {
    880		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
    881		[ C(RESULT_ACCESS) ] = 0x01b7,
    882		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
    883		[ C(RESULT_MISS)   ] = 0x01b7,
    884	},
    885	[ C(OP_PREFETCH) ] = {
    886		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
    887		[ C(RESULT_ACCESS) ] = 0x01b7,
    888		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
    889		[ C(RESULT_MISS)   ] = 0x01b7,
    890	},
    891 },
    892 [ C(DTLB) ] = {
    893	[ C(OP_READ) ] = {
    894		[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
    895		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
    896	},
    897	[ C(OP_WRITE) ] = {
    898		[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
    899		[ C(RESULT_MISS)   ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
    900	},
    901	[ C(OP_PREFETCH) ] = {
    902		[ C(RESULT_ACCESS) ] = 0x0,
    903		[ C(RESULT_MISS)   ] = 0x0,
    904	},
    905 },
    906 [ C(ITLB) ] = {
    907	[ C(OP_READ) ] = {
    908		[ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT         */
    909		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK    */
    910	},
    911	[ C(OP_WRITE) ] = {
    912		[ C(RESULT_ACCESS) ] = -1,
    913		[ C(RESULT_MISS)   ] = -1,
    914	},
    915	[ C(OP_PREFETCH) ] = {
    916		[ C(RESULT_ACCESS) ] = -1,
    917		[ C(RESULT_MISS)   ] = -1,
    918	},
    919 },
    920 [ C(BPU ) ] = {
    921	[ C(OP_READ) ] = {
    922		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
    923		[ C(RESULT_MISS)   ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
    924	},
    925	[ C(OP_WRITE) ] = {
    926		[ C(RESULT_ACCESS) ] = -1,
    927		[ C(RESULT_MISS)   ] = -1,
    928	},
    929	[ C(OP_PREFETCH) ] = {
    930		[ C(RESULT_ACCESS) ] = -1,
    931		[ C(RESULT_MISS)   ] = -1,
    932	},
    933 },
    934 [ C(NODE) ] = {
    935	[ C(OP_READ) ] = {
    936		[ C(RESULT_ACCESS) ] = 0x01b7,
    937		[ C(RESULT_MISS)   ] = 0x01b7,
    938	},
    939	[ C(OP_WRITE) ] = {
    940		[ C(RESULT_ACCESS) ] = 0x01b7,
    941		[ C(RESULT_MISS)   ] = 0x01b7,
    942	},
    943	[ C(OP_PREFETCH) ] = {
    944		[ C(RESULT_ACCESS) ] = 0x01b7,
    945		[ C(RESULT_MISS)   ] = 0x01b7,
    946	},
    947 },
    948
    949};
    950
    951/*
    952 * Notes on the events:
    953 * - data reads do not include code reads (comparable to earlier tables)
    954 * - data counts include speculative execution (except L1 write, dtlb, bpu)
    955 * - remote node access includes remote memory, remote cache, remote mmio.
    956 * - prefetches are not included in the counts because they are not
    957 *   reliably counted.
    958 */
    959
    960#define HSW_DEMAND_DATA_RD		BIT_ULL(0)
    961#define HSW_DEMAND_RFO			BIT_ULL(1)
    962#define HSW_ANY_RESPONSE		BIT_ULL(16)
    963#define HSW_SUPPLIER_NONE		BIT_ULL(17)
    964#define HSW_L3_MISS_LOCAL_DRAM		BIT_ULL(22)
    965#define HSW_L3_MISS_REMOTE_HOP0		BIT_ULL(27)
    966#define HSW_L3_MISS_REMOTE_HOP1		BIT_ULL(28)
    967#define HSW_L3_MISS_REMOTE_HOP2P	BIT_ULL(29)
    968#define HSW_L3_MISS			(HSW_L3_MISS_LOCAL_DRAM| \
    969					 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
    970					 HSW_L3_MISS_REMOTE_HOP2P)
    971#define HSW_SNOOP_NONE			BIT_ULL(31)
    972#define HSW_SNOOP_NOT_NEEDED		BIT_ULL(32)
    973#define HSW_SNOOP_MISS			BIT_ULL(33)
    974#define HSW_SNOOP_HIT_NO_FWD		BIT_ULL(34)
    975#define HSW_SNOOP_HIT_WITH_FWD		BIT_ULL(35)
    976#define HSW_SNOOP_HITM			BIT_ULL(36)
    977#define HSW_SNOOP_NON_DRAM		BIT_ULL(37)
    978#define HSW_ANY_SNOOP			(HSW_SNOOP_NONE| \
    979					 HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
    980					 HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
    981					 HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
    982#define HSW_SNOOP_DRAM			(HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
    983#define HSW_DEMAND_READ			HSW_DEMAND_DATA_RD
    984#define HSW_DEMAND_WRITE		HSW_DEMAND_RFO
    985#define HSW_L3_MISS_REMOTE		(HSW_L3_MISS_REMOTE_HOP0|\
    986					 HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
    987#define HSW_LLC_ACCESS			HSW_ANY_RESPONSE
    988
    989#define BDW_L3_MISS_LOCAL		BIT(26)
    990#define BDW_L3_MISS			(BDW_L3_MISS_LOCAL| \
    991					 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
    992					 HSW_L3_MISS_REMOTE_HOP2P)
    993
    994
    995static __initconst const u64 hsw_hw_cache_event_ids
    996				[PERF_COUNT_HW_CACHE_MAX]
    997				[PERF_COUNT_HW_CACHE_OP_MAX]
    998				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
    999{
   1000 [ C(L1D ) ] = {
   1001	[ C(OP_READ) ] = {
   1002		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
   1003		[ C(RESULT_MISS)   ] = 0x151,	/* L1D.REPLACEMENT */
   1004	},
   1005	[ C(OP_WRITE) ] = {
   1006		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
   1007		[ C(RESULT_MISS)   ] = 0x0,
   1008	},
   1009	[ C(OP_PREFETCH) ] = {
   1010		[ C(RESULT_ACCESS) ] = 0x0,
   1011		[ C(RESULT_MISS)   ] = 0x0,
   1012	},
   1013 },
   1014 [ C(L1I ) ] = {
   1015	[ C(OP_READ) ] = {
   1016		[ C(RESULT_ACCESS) ] = 0x0,
   1017		[ C(RESULT_MISS)   ] = 0x280,	/* ICACHE.MISSES */
   1018	},
   1019	[ C(OP_WRITE) ] = {
   1020		[ C(RESULT_ACCESS) ] = -1,
   1021		[ C(RESULT_MISS)   ] = -1,
   1022	},
   1023	[ C(OP_PREFETCH) ] = {
   1024		[ C(RESULT_ACCESS) ] = 0x0,
   1025		[ C(RESULT_MISS)   ] = 0x0,
   1026	},
   1027 },
   1028 [ C(LL  ) ] = {
   1029	[ C(OP_READ) ] = {
   1030		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
   1031		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
   1032	},
   1033	[ C(OP_WRITE) ] = {
   1034		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
   1035		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
   1036	},
   1037	[ C(OP_PREFETCH) ] = {
   1038		[ C(RESULT_ACCESS) ] = 0x0,
   1039		[ C(RESULT_MISS)   ] = 0x0,
   1040	},
   1041 },
   1042 [ C(DTLB) ] = {
   1043	[ C(OP_READ) ] = {
   1044		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
   1045		[ C(RESULT_MISS)   ] = 0x108,	/* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
   1046	},
   1047	[ C(OP_WRITE) ] = {
   1048		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
   1049		[ C(RESULT_MISS)   ] = 0x149,	/* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
   1050	},
   1051	[ C(OP_PREFETCH) ] = {
   1052		[ C(RESULT_ACCESS) ] = 0x0,
   1053		[ C(RESULT_MISS)   ] = 0x0,
   1054	},
   1055 },
   1056 [ C(ITLB) ] = {
   1057	[ C(OP_READ) ] = {
   1058		[ C(RESULT_ACCESS) ] = 0x6085,	/* ITLB_MISSES.STLB_HIT */
   1059		[ C(RESULT_MISS)   ] = 0x185,	/* ITLB_MISSES.MISS_CAUSES_A_WALK */
   1060	},
   1061	[ C(OP_WRITE) ] = {
   1062		[ C(RESULT_ACCESS) ] = -1,
   1063		[ C(RESULT_MISS)   ] = -1,
   1064	},
   1065	[ C(OP_PREFETCH) ] = {
   1066		[ C(RESULT_ACCESS) ] = -1,
   1067		[ C(RESULT_MISS)   ] = -1,
   1068	},
   1069 },
   1070 [ C(BPU ) ] = {
   1071	[ C(OP_READ) ] = {
   1072		[ C(RESULT_ACCESS) ] = 0xc4,	/* BR_INST_RETIRED.ALL_BRANCHES */
   1073		[ C(RESULT_MISS)   ] = 0xc5,	/* BR_MISP_RETIRED.ALL_BRANCHES */
   1074	},
   1075	[ C(OP_WRITE) ] = {
   1076		[ C(RESULT_ACCESS) ] = -1,
   1077		[ C(RESULT_MISS)   ] = -1,
   1078	},
   1079	[ C(OP_PREFETCH) ] = {
   1080		[ C(RESULT_ACCESS) ] = -1,
   1081		[ C(RESULT_MISS)   ] = -1,
   1082	},
   1083 },
   1084 [ C(NODE) ] = {
   1085	[ C(OP_READ) ] = {
   1086		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
   1087		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
   1088	},
   1089	[ C(OP_WRITE) ] = {
   1090		[ C(RESULT_ACCESS) ] = 0x1b7,	/* OFFCORE_RESPONSE */
   1091		[ C(RESULT_MISS)   ] = 0x1b7,	/* OFFCORE_RESPONSE */
   1092	},
   1093	[ C(OP_PREFETCH) ] = {
   1094		[ C(RESULT_ACCESS) ] = 0x0,
   1095		[ C(RESULT_MISS)   ] = 0x0,
   1096	},
   1097 },
   1098};
   1099
   1100static __initconst const u64 hsw_hw_cache_extra_regs
   1101				[PERF_COUNT_HW_CACHE_MAX]
   1102				[PERF_COUNT_HW_CACHE_OP_MAX]
   1103				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
   1104{
   1105 [ C(LL  ) ] = {
   1106	[ C(OP_READ) ] = {
   1107		[ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
   1108				       HSW_LLC_ACCESS,
   1109		[ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
   1110				       HSW_L3_MISS|HSW_ANY_SNOOP,
   1111	},
   1112	[ C(OP_WRITE) ] = {
   1113		[ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
   1114				       HSW_LLC_ACCESS,
   1115		[ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
   1116				       HSW_L3_MISS|HSW_ANY_SNOOP,
   1117	},
   1118	[ C(OP_PREFETCH) ] = {
   1119		[ C(RESULT_ACCESS) ] = 0x0,
   1120		[ C(RESULT_MISS)   ] = 0x0,
   1121	},
   1122 },
   1123 [ C(NODE) ] = {
   1124	[ C(OP_READ) ] = {
   1125		[ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
   1126				       HSW_L3_MISS_LOCAL_DRAM|
   1127				       HSW_SNOOP_DRAM,
   1128		[ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
   1129				       HSW_L3_MISS_REMOTE|
   1130				       HSW_SNOOP_DRAM,
   1131	},
   1132	[ C(OP_WRITE) ] = {
   1133		[ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
   1134				       HSW_L3_MISS_LOCAL_DRAM|
   1135				       HSW_SNOOP_DRAM,
   1136		[ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
   1137				       HSW_L3_MISS_REMOTE|
   1138				       HSW_SNOOP_DRAM,
   1139	},
   1140	[ C(OP_PREFETCH) ] = {
   1141		[ C(RESULT_ACCESS) ] = 0x0,
   1142		[ C(RESULT_MISS)   ] = 0x0,
   1143	},
   1144 },
   1145};
   1146
   1147static __initconst const u64 westmere_hw_cache_event_ids
   1148				[PERF_COUNT_HW_CACHE_MAX]
   1149				[PERF_COUNT_HW_CACHE_OP_MAX]
   1150				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
   1151{
   1152 [ C(L1D) ] = {
   1153	[ C(OP_READ) ] = {
   1154		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
   1155		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
   1156	},
   1157	[ C(OP_WRITE) ] = {
   1158		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
   1159		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
   1160	},
   1161	[ C(OP_PREFETCH) ] = {
   1162		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
   1163		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
   1164	},
   1165 },
   1166 [ C(L1I ) ] = {
   1167	[ C(OP_READ) ] = {
   1168		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
   1169		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
   1170	},
   1171	[ C(OP_WRITE) ] = {
   1172		[ C(RESULT_ACCESS) ] = -1,
   1173		[ C(RESULT_MISS)   ] = -1,
   1174	},
   1175	[ C(OP_PREFETCH) ] = {
   1176		[ C(RESULT_ACCESS) ] = 0x0,
   1177		[ C(RESULT_MISS)   ] = 0x0,
   1178	},
   1179 },
   1180 [ C(LL  ) ] = {
   1181	[ C(OP_READ) ] = {
   1182		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
   1183		[ C(RESULT_ACCESS) ] = 0x01b7,
   1184		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
   1185		[ C(RESULT_MISS)   ] = 0x01b7,
   1186	},
   1187	/*
   1188	 * Use RFO, not WRITEBACK, because a write miss would typically occur
   1189	 * on RFO.
   1190	 */
   1191	[ C(OP_WRITE) ] = {
   1192		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
   1193		[ C(RESULT_ACCESS) ] = 0x01b7,
   1194		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
   1195		[ C(RESULT_MISS)   ] = 0x01b7,
   1196	},
   1197	[ C(OP_PREFETCH) ] = {
   1198		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
   1199		[ C(RESULT_ACCESS) ] = 0x01b7,
   1200		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
   1201		[ C(RESULT_MISS)   ] = 0x01b7,
   1202	},
   1203 },
   1204 [ C(DTLB) ] = {
   1205	[ C(OP_READ) ] = {
   1206		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
   1207		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
   1208	},
   1209	[ C(OP_WRITE) ] = {
   1210		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
   1211		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
   1212	},
   1213	[ C(OP_PREFETCH) ] = {
   1214		[ C(RESULT_ACCESS) ] = 0x0,
   1215		[ C(RESULT_MISS)   ] = 0x0,
   1216	},
   1217 },
   1218 [ C(ITLB) ] = {
   1219	[ C(OP_READ) ] = {
   1220		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
   1221		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
   1222	},
   1223	[ C(OP_WRITE) ] = {
   1224		[ C(RESULT_ACCESS) ] = -1,
   1225		[ C(RESULT_MISS)   ] = -1,
   1226	},
   1227	[ C(OP_PREFETCH) ] = {
   1228		[ C(RESULT_ACCESS) ] = -1,
   1229		[ C(RESULT_MISS)   ] = -1,
   1230	},
   1231 },
   1232 [ C(BPU ) ] = {
   1233	[ C(OP_READ) ] = {
   1234		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
   1235		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
   1236	},
   1237	[ C(OP_WRITE) ] = {
   1238		[ C(RESULT_ACCESS) ] = -1,
   1239		[ C(RESULT_MISS)   ] = -1,
   1240	},
   1241	[ C(OP_PREFETCH) ] = {
   1242		[ C(RESULT_ACCESS) ] = -1,
   1243		[ C(RESULT_MISS)   ] = -1,
   1244	},
   1245 },
   1246 [ C(NODE) ] = {
   1247	[ C(OP_READ) ] = {
   1248		[ C(RESULT_ACCESS) ] = 0x01b7,
   1249		[ C(RESULT_MISS)   ] = 0x01b7,
   1250	},
   1251	[ C(OP_WRITE) ] = {
   1252		[ C(RESULT_ACCESS) ] = 0x01b7,
   1253		[ C(RESULT_MISS)   ] = 0x01b7,
   1254	},
   1255	[ C(OP_PREFETCH) ] = {
   1256		[ C(RESULT_ACCESS) ] = 0x01b7,
   1257		[ C(RESULT_MISS)   ] = 0x01b7,
   1258	},
   1259 },
   1260};
   1261
   1262/*
   1263 * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
   1264 * See IA32 SDM Vol 3B 30.6.1.3
   1265 */
   1266
   1267#define NHM_DMND_DATA_RD	(1 << 0)
   1268#define NHM_DMND_RFO		(1 << 1)
   1269#define NHM_DMND_IFETCH		(1 << 2)
   1270#define NHM_DMND_WB		(1 << 3)
   1271#define NHM_PF_DATA_RD		(1 << 4)
   1272#define NHM_PF_DATA_RFO		(1 << 5)
   1273#define NHM_PF_IFETCH		(1 << 6)
   1274#define NHM_OFFCORE_OTHER	(1 << 7)
   1275#define NHM_UNCORE_HIT		(1 << 8)
   1276#define NHM_OTHER_CORE_HIT_SNP	(1 << 9)
   1277#define NHM_OTHER_CORE_HITM	(1 << 10)
   1278        			/* reserved */
   1279#define NHM_REMOTE_CACHE_FWD	(1 << 12)
   1280#define NHM_REMOTE_DRAM		(1 << 13)
   1281#define NHM_LOCAL_DRAM		(1 << 14)
   1282#define NHM_NON_DRAM		(1 << 15)
   1283
   1284#define NHM_LOCAL		(NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
   1285#define NHM_REMOTE		(NHM_REMOTE_DRAM)
   1286
   1287#define NHM_DMND_READ		(NHM_DMND_DATA_RD)
   1288#define NHM_DMND_WRITE		(NHM_DMND_RFO|NHM_DMND_WB)
   1289#define NHM_DMND_PREFETCH	(NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
   1290
   1291#define NHM_L3_HIT	(NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
   1292#define NHM_L3_MISS	(NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
   1293#define NHM_L3_ACCESS	(NHM_L3_HIT|NHM_L3_MISS)
   1294
   1295static __initconst const u64 nehalem_hw_cache_extra_regs
   1296				[PERF_COUNT_HW_CACHE_MAX]
   1297				[PERF_COUNT_HW_CACHE_OP_MAX]
   1298				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
   1299{
   1300 [ C(LL  ) ] = {
   1301	[ C(OP_READ) ] = {
   1302		[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
   1303		[ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
   1304	},
   1305	[ C(OP_WRITE) ] = {
   1306		[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
   1307		[ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
   1308	},
   1309	[ C(OP_PREFETCH) ] = {
   1310		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
   1311		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
   1312	},
   1313 },
   1314 [ C(NODE) ] = {
   1315	[ C(OP_READ) ] = {
   1316		[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
   1317		[ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE,
   1318	},
   1319	[ C(OP_WRITE) ] = {
   1320		[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
   1321		[ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE,
   1322	},
   1323	[ C(OP_PREFETCH) ] = {
   1324		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
   1325		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE,
   1326	},
   1327 },
   1328};
   1329
   1330static __initconst const u64 nehalem_hw_cache_event_ids
   1331				[PERF_COUNT_HW_CACHE_MAX]
   1332				[PERF_COUNT_HW_CACHE_OP_MAX]
   1333				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
   1334{
   1335 [ C(L1D) ] = {
   1336	[ C(OP_READ) ] = {
   1337		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
   1338		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
   1339	},
   1340	[ C(OP_WRITE) ] = {
   1341		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
   1342		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
   1343	},
   1344	[ C(OP_PREFETCH) ] = {
   1345		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
   1346		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
   1347	},
   1348 },
   1349 [ C(L1I ) ] = {
   1350	[ C(OP_READ) ] = {
   1351		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
   1352		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
   1353	},
   1354	[ C(OP_WRITE) ] = {
   1355		[ C(RESULT_ACCESS) ] = -1,
   1356		[ C(RESULT_MISS)   ] = -1,
   1357	},
   1358	[ C(OP_PREFETCH) ] = {
   1359		[ C(RESULT_ACCESS) ] = 0x0,
   1360		[ C(RESULT_MISS)   ] = 0x0,
   1361	},
   1362 },
   1363 [ C(LL  ) ] = {
   1364	[ C(OP_READ) ] = {
   1365		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
   1366		[ C(RESULT_ACCESS) ] = 0x01b7,
   1367		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
   1368		[ C(RESULT_MISS)   ] = 0x01b7,
   1369	},
   1370	/*
   1371	 * Use RFO, not WRITEBACK, because a write miss would typically occur
   1372	 * on RFO.
   1373	 */
   1374	[ C(OP_WRITE) ] = {
   1375		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
   1376		[ C(RESULT_ACCESS) ] = 0x01b7,
   1377		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
   1378		[ C(RESULT_MISS)   ] = 0x01b7,
   1379	},
   1380	[ C(OP_PREFETCH) ] = {
   1381		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
   1382		[ C(RESULT_ACCESS) ] = 0x01b7,
   1383		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
   1384		[ C(RESULT_MISS)   ] = 0x01b7,
   1385	},
   1386 },
   1387 [ C(DTLB) ] = {
   1388	[ C(OP_READ) ] = {
   1389		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
   1390		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
   1391	},
   1392	[ C(OP_WRITE) ] = {
   1393		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
   1394		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
   1395	},
   1396	[ C(OP_PREFETCH) ] = {
   1397		[ C(RESULT_ACCESS) ] = 0x0,
   1398		[ C(RESULT_MISS)   ] = 0x0,
   1399	},
   1400 },
   1401 [ C(ITLB) ] = {
   1402	[ C(OP_READ) ] = {
   1403		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
   1404		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
   1405	},
   1406	[ C(OP_WRITE) ] = {
   1407		[ C(RESULT_ACCESS) ] = -1,
   1408		[ C(RESULT_MISS)   ] = -1,
   1409	},
   1410	[ C(OP_PREFETCH) ] = {
   1411		[ C(RESULT_ACCESS) ] = -1,
   1412		[ C(RESULT_MISS)   ] = -1,
   1413	},
   1414 },
   1415 [ C(BPU ) ] = {
   1416	[ C(OP_READ) ] = {
   1417		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
   1418		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
   1419	},
   1420	[ C(OP_WRITE) ] = {
   1421		[ C(RESULT_ACCESS) ] = -1,
   1422		[ C(RESULT_MISS)   ] = -1,
   1423	},
   1424	[ C(OP_PREFETCH) ] = {
   1425		[ C(RESULT_ACCESS) ] = -1,
   1426		[ C(RESULT_MISS)   ] = -1,
   1427	},
   1428 },
   1429 [ C(NODE) ] = {
   1430	[ C(OP_READ) ] = {
   1431		[ C(RESULT_ACCESS) ] = 0x01b7,
   1432		[ C(RESULT_MISS)   ] = 0x01b7,
   1433	},
   1434	[ C(OP_WRITE) ] = {
   1435		[ C(RESULT_ACCESS) ] = 0x01b7,
   1436		[ C(RESULT_MISS)   ] = 0x01b7,
   1437	},
   1438	[ C(OP_PREFETCH) ] = {
   1439		[ C(RESULT_ACCESS) ] = 0x01b7,
   1440		[ C(RESULT_MISS)   ] = 0x01b7,
   1441	},
   1442 },
   1443};
   1444
   1445static __initconst const u64 core2_hw_cache_event_ids
   1446				[PERF_COUNT_HW_CACHE_MAX]
   1447				[PERF_COUNT_HW_CACHE_OP_MAX]
   1448				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
   1449{
   1450 [ C(L1D) ] = {
   1451	[ C(OP_READ) ] = {
   1452		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
   1453		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
   1454	},
   1455	[ C(OP_WRITE) ] = {
   1456		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
   1457		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
   1458	},
   1459	[ C(OP_PREFETCH) ] = {
   1460		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
   1461		[ C(RESULT_MISS)   ] = 0,
   1462	},
   1463 },
   1464 [ C(L1I ) ] = {
   1465	[ C(OP_READ) ] = {
   1466		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
   1467		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
   1468	},
   1469	[ C(OP_WRITE) ] = {
   1470		[ C(RESULT_ACCESS) ] = -1,
   1471		[ C(RESULT_MISS)   ] = -1,
   1472	},
   1473	[ C(OP_PREFETCH) ] = {
   1474		[ C(RESULT_ACCESS) ] = 0,
   1475		[ C(RESULT_MISS)   ] = 0,
   1476	},
   1477 },
   1478 [ C(LL  ) ] = {
   1479	[ C(OP_READ) ] = {
   1480		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
   1481		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
   1482	},
   1483	[ C(OP_WRITE) ] = {
   1484		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
   1485		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
   1486	},
   1487	[ C(OP_PREFETCH) ] = {
   1488		[ C(RESULT_ACCESS) ] = 0,
   1489		[ C(RESULT_MISS)   ] = 0,
   1490	},
   1491 },
   1492 [ C(DTLB) ] = {
   1493	[ C(OP_READ) ] = {
   1494		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
   1495		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
   1496	},
   1497	[ C(OP_WRITE) ] = {
   1498		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
   1499		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
   1500	},
   1501	[ C(OP_PREFETCH) ] = {
   1502		[ C(RESULT_ACCESS) ] = 0,
   1503		[ C(RESULT_MISS)   ] = 0,
   1504	},
   1505 },
   1506 [ C(ITLB) ] = {
   1507	[ C(OP_READ) ] = {
   1508		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
   1509		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
   1510	},
   1511	[ C(OP_WRITE) ] = {
   1512		[ C(RESULT_ACCESS) ] = -1,
   1513		[ C(RESULT_MISS)   ] = -1,
   1514	},
   1515	[ C(OP_PREFETCH) ] = {
   1516		[ C(RESULT_ACCESS) ] = -1,
   1517		[ C(RESULT_MISS)   ] = -1,
   1518	},
   1519 },
   1520 [ C(BPU ) ] = {
   1521	[ C(OP_READ) ] = {
   1522		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
   1523		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
   1524	},
   1525	[ C(OP_WRITE) ] = {
   1526		[ C(RESULT_ACCESS) ] = -1,
   1527		[ C(RESULT_MISS)   ] = -1,
   1528	},
   1529	[ C(OP_PREFETCH) ] = {
   1530		[ C(RESULT_ACCESS) ] = -1,
   1531		[ C(RESULT_MISS)   ] = -1,
   1532	},
   1533 },
   1534};
   1535
   1536static __initconst const u64 atom_hw_cache_event_ids
   1537				[PERF_COUNT_HW_CACHE_MAX]
   1538				[PERF_COUNT_HW_CACHE_OP_MAX]
   1539				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
   1540{
   1541 [ C(L1D) ] = {
   1542	[ C(OP_READ) ] = {
   1543		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
   1544		[ C(RESULT_MISS)   ] = 0,
   1545	},
   1546	[ C(OP_WRITE) ] = {
   1547		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
   1548		[ C(RESULT_MISS)   ] = 0,
   1549	},
   1550	[ C(OP_PREFETCH) ] = {
   1551		[ C(RESULT_ACCESS) ] = 0x0,
   1552		[ C(RESULT_MISS)   ] = 0,
   1553	},
   1554 },
   1555 [ C(L1I ) ] = {
   1556	[ C(OP_READ) ] = {
   1557		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
   1558		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
   1559	},
   1560	[ C(OP_WRITE) ] = {
   1561		[ C(RESULT_ACCESS) ] = -1,
   1562		[ C(RESULT_MISS)   ] = -1,
   1563	},
   1564	[ C(OP_PREFETCH) ] = {
   1565		[ C(RESULT_ACCESS) ] = 0,
   1566		[ C(RESULT_MISS)   ] = 0,
   1567	},
   1568 },
   1569 [ C(LL  ) ] = {
   1570	[ C(OP_READ) ] = {
   1571		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
   1572		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
   1573	},
   1574	[ C(OP_WRITE) ] = {
   1575		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
   1576		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
   1577	},
   1578	[ C(OP_PREFETCH) ] = {
   1579		[ C(RESULT_ACCESS) ] = 0,
   1580		[ C(RESULT_MISS)   ] = 0,
   1581	},
   1582 },
   1583 [ C(DTLB) ] = {
   1584	[ C(OP_READ) ] = {
   1585		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
   1586		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
   1587	},
   1588	[ C(OP_WRITE) ] = {
   1589		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
   1590		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
   1591	},
   1592	[ C(OP_PREFETCH) ] = {
   1593		[ C(RESULT_ACCESS) ] = 0,
   1594		[ C(RESULT_MISS)   ] = 0,
   1595	},
   1596 },
   1597 [ C(ITLB) ] = {
   1598	[ C(OP_READ) ] = {
   1599		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
   1600		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
   1601	},
   1602	[ C(OP_WRITE) ] = {
   1603		[ C(RESULT_ACCESS) ] = -1,
   1604		[ C(RESULT_MISS)   ] = -1,
   1605	},
   1606	[ C(OP_PREFETCH) ] = {
   1607		[ C(RESULT_ACCESS) ] = -1,
   1608		[ C(RESULT_MISS)   ] = -1,
   1609	},
   1610 },
   1611 [ C(BPU ) ] = {
   1612	[ C(OP_READ) ] = {
   1613		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
   1614		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
   1615	},
   1616	[ C(OP_WRITE) ] = {
   1617		[ C(RESULT_ACCESS) ] = -1,
   1618		[ C(RESULT_MISS)   ] = -1,
   1619	},
   1620	[ C(OP_PREFETCH) ] = {
   1621		[ C(RESULT_ACCESS) ] = -1,
   1622		[ C(RESULT_MISS)   ] = -1,
   1623	},
   1624 },
   1625};
   1626
   1627EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
   1628EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
   1629/* no_alloc_cycles.not_delivered */
   1630EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
   1631	       "event=0xca,umask=0x50");
   1632EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
   1633/* uops_retired.all */
   1634EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
   1635	       "event=0xc2,umask=0x10");
   1636/* uops_retired.all */
   1637EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
   1638	       "event=0xc2,umask=0x10");
   1639
   1640static struct attribute *slm_events_attrs[] = {
   1641	EVENT_PTR(td_total_slots_slm),
   1642	EVENT_PTR(td_total_slots_scale_slm),
   1643	EVENT_PTR(td_fetch_bubbles_slm),
   1644	EVENT_PTR(td_fetch_bubbles_scale_slm),
   1645	EVENT_PTR(td_slots_issued_slm),
   1646	EVENT_PTR(td_slots_retired_slm),
   1647	NULL
   1648};
   1649
   1650static struct extra_reg intel_slm_extra_regs[] __read_mostly =
   1651{
   1652	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
   1653	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
   1654	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
   1655	EVENT_EXTRA_END
   1656};
   1657
   1658#define SLM_DMND_READ		SNB_DMND_DATA_RD
   1659#define SLM_DMND_WRITE		SNB_DMND_RFO
   1660#define SLM_DMND_PREFETCH	(SNB_PF_DATA_RD|SNB_PF_RFO)
   1661
   1662#define SLM_SNP_ANY		(SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
   1663#define SLM_LLC_ACCESS		SNB_RESP_ANY
   1664#define SLM_LLC_MISS		(SLM_SNP_ANY|SNB_NON_DRAM)
   1665
   1666static __initconst const u64 slm_hw_cache_extra_regs
   1667				[PERF_COUNT_HW_CACHE_MAX]
   1668				[PERF_COUNT_HW_CACHE_OP_MAX]
   1669				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
   1670{
   1671 [ C(LL  ) ] = {
   1672	[ C(OP_READ) ] = {
   1673		[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
   1674		[ C(RESULT_MISS)   ] = 0,
   1675	},
   1676	[ C(OP_WRITE) ] = {
   1677		[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
   1678		[ C(RESULT_MISS)   ] = SLM_DMND_WRITE|SLM_LLC_MISS,
   1679	},
   1680	[ C(OP_PREFETCH) ] = {
   1681		[ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
   1682		[ C(RESULT_MISS)   ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
   1683	},
   1684 },
   1685};
   1686
   1687static __initconst const u64 slm_hw_cache_event_ids
   1688				[PERF_COUNT_HW_CACHE_MAX]
   1689				[PERF_COUNT_HW_CACHE_OP_MAX]
   1690				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
   1691{
   1692 [ C(L1D) ] = {
   1693	[ C(OP_READ) ] = {
   1694		[ C(RESULT_ACCESS) ] = 0,
   1695		[ C(RESULT_MISS)   ] = 0x0104, /* LD_DCU_MISS */
   1696	},
   1697	[ C(OP_WRITE) ] = {
   1698		[ C(RESULT_ACCESS) ] = 0,
   1699		[ C(RESULT_MISS)   ] = 0,
   1700	},
   1701	[ C(OP_PREFETCH) ] = {
   1702		[ C(RESULT_ACCESS) ] = 0,
   1703		[ C(RESULT_MISS)   ] = 0,
   1704	},
   1705 },
   1706 [ C(L1I ) ] = {
   1707	[ C(OP_READ) ] = {
   1708		[ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
   1709		[ C(RESULT_MISS)   ] = 0x0280, /* ICACGE.MISSES */
   1710	},
   1711	[ C(OP_WRITE) ] = {
   1712		[ C(RESULT_ACCESS) ] = -1,
   1713		[ C(RESULT_MISS)   ] = -1,
   1714	},
   1715	[ C(OP_PREFETCH) ] = {
   1716		[ C(RESULT_ACCESS) ] = 0,
   1717		[ C(RESULT_MISS)   ] = 0,
   1718	},
   1719 },
   1720 [ C(LL  ) ] = {
   1721	[ C(OP_READ) ] = {
   1722		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
   1723		[ C(RESULT_ACCESS) ] = 0x01b7,
   1724		[ C(RESULT_MISS)   ] = 0,
   1725	},
   1726	[ C(OP_WRITE) ] = {
   1727		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
   1728		[ C(RESULT_ACCESS) ] = 0x01b7,
   1729		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
   1730		[ C(RESULT_MISS)   ] = 0x01b7,
   1731	},
   1732	[ C(OP_PREFETCH) ] = {
   1733		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
   1734		[ C(RESULT_ACCESS) ] = 0x01b7,
   1735		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
   1736		[ C(RESULT_MISS)   ] = 0x01b7,
   1737	},
   1738 },
   1739 [ C(DTLB) ] = {
   1740	[ C(OP_READ) ] = {
   1741		[ C(RESULT_ACCESS) ] = 0,
   1742		[ C(RESULT_MISS)   ] = 0x0804, /* LD_DTLB_MISS */
   1743	},
   1744	[ C(OP_WRITE) ] = {
   1745		[ C(RESULT_ACCESS) ] = 0,
   1746		[ C(RESULT_MISS)   ] = 0,
   1747	},
   1748	[ C(OP_PREFETCH) ] = {
   1749		[ C(RESULT_ACCESS) ] = 0,
   1750		[ C(RESULT_MISS)   ] = 0,
   1751	},
   1752 },
   1753 [ C(ITLB) ] = {
   1754	[ C(OP_READ) ] = {
   1755		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
   1756		[ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
   1757	},
   1758	[ C(OP_WRITE) ] = {
   1759		[ C(RESULT_ACCESS) ] = -1,
   1760		[ C(RESULT_MISS)   ] = -1,
   1761	},
   1762	[ C(OP_PREFETCH) ] = {
   1763		[ C(RESULT_ACCESS) ] = -1,
   1764		[ C(RESULT_MISS)   ] = -1,
   1765	},
   1766 },
   1767 [ C(BPU ) ] = {
   1768	[ C(OP_READ) ] = {
   1769		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
   1770		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
   1771	},
   1772	[ C(OP_WRITE) ] = {
   1773		[ C(RESULT_ACCESS) ] = -1,
   1774		[ C(RESULT_MISS)   ] = -1,
   1775	},
   1776	[ C(OP_PREFETCH) ] = {
   1777		[ C(RESULT_ACCESS) ] = -1,
   1778		[ C(RESULT_MISS)   ] = -1,
   1779	},
   1780 },
   1781};
   1782
   1783EVENT_ATTR_STR(topdown-total-slots, td_total_slots_glm, "event=0x3c");
   1784EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_glm, "3");
   1785/* UOPS_NOT_DELIVERED.ANY */
   1786EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_glm, "event=0x9c");
   1787/* ISSUE_SLOTS_NOT_CONSUMED.RECOVERY */
   1788EVENT_ATTR_STR(topdown-recovery-bubbles, td_recovery_bubbles_glm, "event=0xca,umask=0x02");
   1789/* UOPS_RETIRED.ANY */
   1790EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_glm, "event=0xc2");
   1791/* UOPS_ISSUED.ANY */
   1792EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_glm, "event=0x0e");
   1793
   1794static struct attribute *glm_events_attrs[] = {
   1795	EVENT_PTR(td_total_slots_glm),
   1796	EVENT_PTR(td_total_slots_scale_glm),
   1797	EVENT_PTR(td_fetch_bubbles_glm),
   1798	EVENT_PTR(td_recovery_bubbles_glm),
   1799	EVENT_PTR(td_slots_issued_glm),
   1800	EVENT_PTR(td_slots_retired_glm),
   1801	NULL
   1802};
   1803
   1804static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
   1805	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
   1806	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
   1807	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1),
   1808	EVENT_EXTRA_END
   1809};
   1810
   1811#define GLM_DEMAND_DATA_RD		BIT_ULL(0)
   1812#define GLM_DEMAND_RFO			BIT_ULL(1)
   1813#define GLM_ANY_RESPONSE		BIT_ULL(16)
   1814#define GLM_SNP_NONE_OR_MISS		BIT_ULL(33)
   1815#define GLM_DEMAND_READ			GLM_DEMAND_DATA_RD
   1816#define GLM_DEMAND_WRITE		GLM_DEMAND_RFO
   1817#define GLM_DEMAND_PREFETCH		(SNB_PF_DATA_RD|SNB_PF_RFO)
   1818#define GLM_LLC_ACCESS			GLM_ANY_RESPONSE
   1819#define GLM_SNP_ANY			(GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM)
   1820#define GLM_LLC_MISS			(GLM_SNP_ANY|SNB_NON_DRAM)
   1821
   1822static __initconst const u64 glm_hw_cache_event_ids
   1823				[PERF_COUNT_HW_CACHE_MAX]
   1824				[PERF_COUNT_HW_CACHE_OP_MAX]
   1825				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
   1826	[C(L1D)] = {
   1827		[C(OP_READ)] = {
   1828			[C(RESULT_ACCESS)]	= 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
   1829			[C(RESULT_MISS)]	= 0x0,
   1830		},
   1831		[C(OP_WRITE)] = {
   1832			[C(RESULT_ACCESS)]	= 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
   1833			[C(RESULT_MISS)]	= 0x0,
   1834		},
   1835		[C(OP_PREFETCH)] = {
   1836			[C(RESULT_ACCESS)]	= 0x0,
   1837			[C(RESULT_MISS)]	= 0x0,
   1838		},
   1839	},
   1840	[C(L1I)] = {
   1841		[C(OP_READ)] = {
   1842			[C(RESULT_ACCESS)]	= 0x0380,	/* ICACHE.ACCESSES */
   1843			[C(RESULT_MISS)]	= 0x0280,	/* ICACHE.MISSES */
   1844		},
   1845		[C(OP_WRITE)] = {
   1846			[C(RESULT_ACCESS)]	= -1,
   1847			[C(RESULT_MISS)]	= -1,
   1848		},
   1849		[C(OP_PREFETCH)] = {
   1850			[C(RESULT_ACCESS)]	= 0x0,
   1851			[C(RESULT_MISS)]	= 0x0,
   1852		},
   1853	},
   1854	[C(LL)] = {
   1855		[C(OP_READ)] = {
   1856			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
   1857			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
   1858		},
   1859		[C(OP_WRITE)] = {
   1860			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
   1861			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
   1862		},
   1863		[C(OP_PREFETCH)] = {
   1864			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
   1865			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
   1866		},
   1867	},
   1868	[C(DTLB)] = {
   1869		[C(OP_READ)] = {
   1870			[C(RESULT_ACCESS)]	= 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
   1871			[C(RESULT_MISS)]	= 0x0,
   1872		},
   1873		[C(OP_WRITE)] = {
   1874			[C(RESULT_ACCESS)]	= 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
   1875			[C(RESULT_MISS)]	= 0x0,
   1876		},
   1877		[C(OP_PREFETCH)] = {
   1878			[C(RESULT_ACCESS)]	= 0x0,
   1879			[C(RESULT_MISS)]	= 0x0,
   1880		},
   1881	},
   1882	[C(ITLB)] = {
   1883		[C(OP_READ)] = {
   1884			[C(RESULT_ACCESS)]	= 0x00c0,	/* INST_RETIRED.ANY_P */
   1885			[C(RESULT_MISS)]	= 0x0481,	/* ITLB.MISS */
   1886		},
   1887		[C(OP_WRITE)] = {
   1888			[C(RESULT_ACCESS)]	= -1,
   1889			[C(RESULT_MISS)]	= -1,
   1890		},
   1891		[C(OP_PREFETCH)] = {
   1892			[C(RESULT_ACCESS)]	= -1,
   1893			[C(RESULT_MISS)]	= -1,
   1894		},
   1895	},
   1896	[C(BPU)] = {
   1897		[C(OP_READ)] = {
   1898			[C(RESULT_ACCESS)]	= 0x00c4,	/* BR_INST_RETIRED.ALL_BRANCHES */
   1899			[C(RESULT_MISS)]	= 0x00c5,	/* BR_MISP_RETIRED.ALL_BRANCHES */
   1900		},
   1901		[C(OP_WRITE)] = {
   1902			[C(RESULT_ACCESS)]	= -1,
   1903			[C(RESULT_MISS)]	= -1,
   1904		},
   1905		[C(OP_PREFETCH)] = {
   1906			[C(RESULT_ACCESS)]	= -1,
   1907			[C(RESULT_MISS)]	= -1,
   1908		},
   1909	},
   1910};
   1911
   1912static __initconst const u64 glm_hw_cache_extra_regs
   1913				[PERF_COUNT_HW_CACHE_MAX]
   1914				[PERF_COUNT_HW_CACHE_OP_MAX]
   1915				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
   1916	[C(LL)] = {
   1917		[C(OP_READ)] = {
   1918			[C(RESULT_ACCESS)]	= GLM_DEMAND_READ|
   1919						  GLM_LLC_ACCESS,
   1920			[C(RESULT_MISS)]	= GLM_DEMAND_READ|
   1921						  GLM_LLC_MISS,
   1922		},
   1923		[C(OP_WRITE)] = {
   1924			[C(RESULT_ACCESS)]	= GLM_DEMAND_WRITE|
   1925						  GLM_LLC_ACCESS,
   1926			[C(RESULT_MISS)]	= GLM_DEMAND_WRITE|
   1927						  GLM_LLC_MISS,
   1928		},
   1929		[C(OP_PREFETCH)] = {
   1930			[C(RESULT_ACCESS)]	= GLM_DEMAND_PREFETCH|
   1931						  GLM_LLC_ACCESS,
   1932			[C(RESULT_MISS)]	= GLM_DEMAND_PREFETCH|
   1933						  GLM_LLC_MISS,
   1934		},
   1935	},
   1936};
   1937
   1938static __initconst const u64 glp_hw_cache_event_ids
   1939				[PERF_COUNT_HW_CACHE_MAX]
   1940				[PERF_COUNT_HW_CACHE_OP_MAX]
   1941				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
   1942	[C(L1D)] = {
   1943		[C(OP_READ)] = {
   1944			[C(RESULT_ACCESS)]	= 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
   1945			[C(RESULT_MISS)]	= 0x0,
   1946		},
   1947		[C(OP_WRITE)] = {
   1948			[C(RESULT_ACCESS)]	= 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
   1949			[C(RESULT_MISS)]	= 0x0,
   1950		},
   1951		[C(OP_PREFETCH)] = {
   1952			[C(RESULT_ACCESS)]	= 0x0,
   1953			[C(RESULT_MISS)]	= 0x0,
   1954		},
   1955	},
   1956	[C(L1I)] = {
   1957		[C(OP_READ)] = {
   1958			[C(RESULT_ACCESS)]	= 0x0380,	/* ICACHE.ACCESSES */
   1959			[C(RESULT_MISS)]	= 0x0280,	/* ICACHE.MISSES */
   1960		},
   1961		[C(OP_WRITE)] = {
   1962			[C(RESULT_ACCESS)]	= -1,
   1963			[C(RESULT_MISS)]	= -1,
   1964		},
   1965		[C(OP_PREFETCH)] = {
   1966			[C(RESULT_ACCESS)]	= 0x0,
   1967			[C(RESULT_MISS)]	= 0x0,
   1968		},
   1969	},
   1970	[C(LL)] = {
   1971		[C(OP_READ)] = {
   1972			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
   1973			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
   1974		},
   1975		[C(OP_WRITE)] = {
   1976			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
   1977			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
   1978		},
   1979		[C(OP_PREFETCH)] = {
   1980			[C(RESULT_ACCESS)]	= 0x0,
   1981			[C(RESULT_MISS)]	= 0x0,
   1982		},
   1983	},
   1984	[C(DTLB)] = {
   1985		[C(OP_READ)] = {
   1986			[C(RESULT_ACCESS)]	= 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
   1987			[C(RESULT_MISS)]	= 0xe08,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
   1988		},
   1989		[C(OP_WRITE)] = {
   1990			[C(RESULT_ACCESS)]	= 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
   1991			[C(RESULT_MISS)]	= 0xe49,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
   1992		},
   1993		[C(OP_PREFETCH)] = {
   1994			[C(RESULT_ACCESS)]	= 0x0,
   1995			[C(RESULT_MISS)]	= 0x0,
   1996		},
   1997	},
   1998	[C(ITLB)] = {
   1999		[C(OP_READ)] = {
   2000			[C(RESULT_ACCESS)]	= 0x00c0,	/* INST_RETIRED.ANY_P */
   2001			[C(RESULT_MISS)]	= 0x0481,	/* ITLB.MISS */
   2002		},
   2003		[C(OP_WRITE)] = {
   2004			[C(RESULT_ACCESS)]	= -1,
   2005			[C(RESULT_MISS)]	= -1,
   2006		},
   2007		[C(OP_PREFETCH)] = {
   2008			[C(RESULT_ACCESS)]	= -1,
   2009			[C(RESULT_MISS)]	= -1,
   2010		},
   2011	},
   2012	[C(BPU)] = {
   2013		[C(OP_READ)] = {
   2014			[C(RESULT_ACCESS)]	= 0x00c4,	/* BR_INST_RETIRED.ALL_BRANCHES */
   2015			[C(RESULT_MISS)]	= 0x00c5,	/* BR_MISP_RETIRED.ALL_BRANCHES */
   2016		},
   2017		[C(OP_WRITE)] = {
   2018			[C(RESULT_ACCESS)]	= -1,
   2019			[C(RESULT_MISS)]	= -1,
   2020		},
   2021		[C(OP_PREFETCH)] = {
   2022			[C(RESULT_ACCESS)]	= -1,
   2023			[C(RESULT_MISS)]	= -1,
   2024		},
   2025	},
   2026};
   2027
   2028static __initconst const u64 glp_hw_cache_extra_regs
   2029				[PERF_COUNT_HW_CACHE_MAX]
   2030				[PERF_COUNT_HW_CACHE_OP_MAX]
   2031				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
   2032	[C(LL)] = {
   2033		[C(OP_READ)] = {
   2034			[C(RESULT_ACCESS)]	= GLM_DEMAND_READ|
   2035						  GLM_LLC_ACCESS,
   2036			[C(RESULT_MISS)]	= GLM_DEMAND_READ|
   2037						  GLM_LLC_MISS,
   2038		},
   2039		[C(OP_WRITE)] = {
   2040			[C(RESULT_ACCESS)]	= GLM_DEMAND_WRITE|
   2041						  GLM_LLC_ACCESS,
   2042			[C(RESULT_MISS)]	= GLM_DEMAND_WRITE|
   2043						  GLM_LLC_MISS,
   2044		},
   2045		[C(OP_PREFETCH)] = {
   2046			[C(RESULT_ACCESS)]	= 0x0,
   2047			[C(RESULT_MISS)]	= 0x0,
   2048		},
   2049	},
   2050};
   2051
   2052#define TNT_LOCAL_DRAM			BIT_ULL(26)
   2053#define TNT_DEMAND_READ			GLM_DEMAND_DATA_RD
   2054#define TNT_DEMAND_WRITE		GLM_DEMAND_RFO
   2055#define TNT_LLC_ACCESS			GLM_ANY_RESPONSE
   2056#define TNT_SNP_ANY			(SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \
   2057					 SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM)
   2058#define TNT_LLC_MISS			(TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM)
   2059
   2060static __initconst const u64 tnt_hw_cache_extra_regs
   2061				[PERF_COUNT_HW_CACHE_MAX]
   2062				[PERF_COUNT_HW_CACHE_OP_MAX]
   2063				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
   2064	[C(LL)] = {
   2065		[C(OP_READ)] = {
   2066			[C(RESULT_ACCESS)]	= TNT_DEMAND_READ|
   2067						  TNT_LLC_ACCESS,
   2068			[C(RESULT_MISS)]	= TNT_DEMAND_READ|
   2069						  TNT_LLC_MISS,
   2070		},
   2071		[C(OP_WRITE)] = {
   2072			[C(RESULT_ACCESS)]	= TNT_DEMAND_WRITE|
   2073						  TNT_LLC_ACCESS,
   2074			[C(RESULT_MISS)]	= TNT_DEMAND_WRITE|
   2075						  TNT_LLC_MISS,
   2076		},
   2077		[C(OP_PREFETCH)] = {
   2078			[C(RESULT_ACCESS)]	= 0x0,
   2079			[C(RESULT_MISS)]	= 0x0,
   2080		},
   2081	},
   2082};
   2083
   2084EVENT_ATTR_STR(topdown-fe-bound,       td_fe_bound_tnt,        "event=0x71,umask=0x0");
   2085EVENT_ATTR_STR(topdown-retiring,       td_retiring_tnt,        "event=0xc2,umask=0x0");
   2086EVENT_ATTR_STR(topdown-bad-spec,       td_bad_spec_tnt,        "event=0x73,umask=0x6");
   2087EVENT_ATTR_STR(topdown-be-bound,       td_be_bound_tnt,        "event=0x74,umask=0x0");
   2088
   2089static struct attribute *tnt_events_attrs[] = {
   2090	EVENT_PTR(td_fe_bound_tnt),
   2091	EVENT_PTR(td_retiring_tnt),
   2092	EVENT_PTR(td_bad_spec_tnt),
   2093	EVENT_PTR(td_be_bound_tnt),
   2094	NULL,
   2095};
   2096
   2097static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
   2098	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
   2099	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0),
   2100	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff0ffffff9fffull, RSP_1),
   2101	EVENT_EXTRA_END
   2102};
   2103
   2104static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
   2105	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
   2106	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
   2107	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
   2108	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0),
   2109	EVENT_EXTRA_END
   2110};
   2111
   2112#define KNL_OT_L2_HITE		BIT_ULL(19) /* Other Tile L2 Hit */
   2113#define KNL_OT_L2_HITF		BIT_ULL(20) /* Other Tile L2 Hit */
   2114#define KNL_MCDRAM_LOCAL	BIT_ULL(21)
   2115#define KNL_MCDRAM_FAR		BIT_ULL(22)
   2116#define KNL_DDR_LOCAL		BIT_ULL(23)
   2117#define KNL_DDR_FAR		BIT_ULL(24)
   2118#define KNL_DRAM_ANY		(KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \
   2119				    KNL_DDR_LOCAL | KNL_DDR_FAR)
   2120#define KNL_L2_READ		SLM_DMND_READ
   2121#define KNL_L2_WRITE		SLM_DMND_WRITE
   2122#define KNL_L2_PREFETCH		SLM_DMND_PREFETCH
   2123#define KNL_L2_ACCESS		SLM_LLC_ACCESS
   2124#define KNL_L2_MISS		(KNL_OT_L2_HITE | KNL_OT_L2_HITF | \
   2125				   KNL_DRAM_ANY | SNB_SNP_ANY | \
   2126						  SNB_NON_DRAM)
   2127
   2128static __initconst const u64 knl_hw_cache_extra_regs
   2129				[PERF_COUNT_HW_CACHE_MAX]
   2130				[PERF_COUNT_HW_CACHE_OP_MAX]
   2131				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
   2132	[C(LL)] = {
   2133		[C(OP_READ)] = {
   2134			[C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS,
   2135			[C(RESULT_MISS)]   = 0,
   2136		},
   2137		[C(OP_WRITE)] = {
   2138			[C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS,
   2139			[C(RESULT_MISS)]   = KNL_L2_WRITE | KNL_L2_MISS,
   2140		},
   2141		[C(OP_PREFETCH)] = {
   2142			[C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS,
   2143			[C(RESULT_MISS)]   = KNL_L2_PREFETCH | KNL_L2_MISS,
   2144		},
   2145	},
   2146};
   2147
   2148/*
   2149 * Used from PMIs where the LBRs are already disabled.
   2150 *
   2151 * This function could be called consecutively. It is required to remain in
   2152 * disabled state if called consecutively.
   2153 *
   2154 * During consecutive calls, the same disable value will be written to related
   2155 * registers, so the PMU state remains unchanged.
   2156 *
   2157 * intel_bts events don't coexist with intel PMU's BTS events because of
   2158 * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them
   2159 * disabled around intel PMU's event batching etc, only inside the PMI handler.
   2160 *
   2161 * Avoid PEBS_ENABLE MSR access in PMIs.
   2162 * The GLOBAL_CTRL has been disabled. All the counters do not count anymore.
   2163 * It doesn't matter if the PEBS is enabled or not.
   2164 * Usually, the PEBS status are not changed in PMIs. It's unnecessary to
   2165 * access PEBS_ENABLE MSR in disable_all()/enable_all().
   2166 * However, there are some cases which may change PEBS status, e.g. PMI
   2167 * throttle. The PEBS_ENABLE should be updated where the status changes.
   2168 */
   2169static __always_inline void __intel_pmu_disable_all(bool bts)
   2170{
   2171	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2172
   2173	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
   2174
   2175	if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
   2176		intel_pmu_disable_bts();
   2177}
   2178
   2179static __always_inline void intel_pmu_disable_all(void)
   2180{
   2181	__intel_pmu_disable_all(true);
   2182	intel_pmu_pebs_disable_all();
   2183	intel_pmu_lbr_disable_all();
   2184}
   2185
   2186static void __intel_pmu_enable_all(int added, bool pmi)
   2187{
   2188	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2189	u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
   2190
   2191	intel_pmu_lbr_enable_all(pmi);
   2192	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
   2193	       intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
   2194
   2195	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
   2196		struct perf_event *event =
   2197			cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
   2198
   2199		if (WARN_ON_ONCE(!event))
   2200			return;
   2201
   2202		intel_pmu_enable_bts(event->hw.config);
   2203	}
   2204}
   2205
   2206static void intel_pmu_enable_all(int added)
   2207{
   2208	intel_pmu_pebs_enable_all();
   2209	__intel_pmu_enable_all(added, false);
   2210}
   2211
   2212static noinline int
   2213__intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries,
   2214				  unsigned int cnt, unsigned long flags)
   2215{
   2216	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2217
   2218	intel_pmu_lbr_read();
   2219	cnt = min_t(unsigned int, cnt, x86_pmu.lbr_nr);
   2220
   2221	memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
   2222	intel_pmu_enable_all(0);
   2223	local_irq_restore(flags);
   2224	return cnt;
   2225}
   2226
   2227static int
   2228intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
   2229{
   2230	unsigned long flags;
   2231
   2232	/* must not have branches... */
   2233	local_irq_save(flags);
   2234	__intel_pmu_disable_all(false); /* we don't care about BTS */
   2235	__intel_pmu_lbr_disable();
   2236	/*            ... until here */
   2237	return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
   2238}
   2239
   2240static int
   2241intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
   2242{
   2243	unsigned long flags;
   2244
   2245	/* must not have branches... */
   2246	local_irq_save(flags);
   2247	__intel_pmu_disable_all(false); /* we don't care about BTS */
   2248	__intel_pmu_arch_lbr_disable();
   2249	/*            ... until here */
   2250	return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
   2251}
   2252
   2253/*
   2254 * Workaround for:
   2255 *   Intel Errata AAK100 (model 26)
   2256 *   Intel Errata AAP53  (model 30)
   2257 *   Intel Errata BD53   (model 44)
   2258 *
   2259 * The official story:
   2260 *   These chips need to be 'reset' when adding counters by programming the
   2261 *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
   2262 *   in sequence on the same PMC or on different PMCs.
   2263 *
   2264 * In practice it appears some of these events do in fact count, and
   2265 * we need to program all 4 events.
   2266 */
   2267static void intel_pmu_nhm_workaround(void)
   2268{
   2269	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2270	static const unsigned long nhm_magic[4] = {
   2271		0x4300B5,
   2272		0x4300D2,
   2273		0x4300B1,
   2274		0x4300B1
   2275	};
   2276	struct perf_event *event;
   2277	int i;
   2278
   2279	/*
   2280	 * The Errata requires below steps:
   2281	 * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
   2282	 * 2) Configure 4 PERFEVTSELx with the magic events and clear
   2283	 *    the corresponding PMCx;
   2284	 * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
   2285	 * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
   2286	 * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
   2287	 */
   2288
   2289	/*
   2290	 * The real steps we choose are a little different from above.
   2291	 * A) To reduce MSR operations, we don't run step 1) as they
   2292	 *    are already cleared before this function is called;
   2293	 * B) Call x86_perf_event_update to save PMCx before configuring
   2294	 *    PERFEVTSELx with magic number;
   2295	 * C) With step 5), we do clear only when the PERFEVTSELx is
   2296	 *    not used currently.
   2297	 * D) Call x86_perf_event_set_period to restore PMCx;
   2298	 */
   2299
   2300	/* We always operate 4 pairs of PERF Counters */
   2301	for (i = 0; i < 4; i++) {
   2302		event = cpuc->events[i];
   2303		if (event)
   2304			x86_perf_event_update(event);
   2305	}
   2306
   2307	for (i = 0; i < 4; i++) {
   2308		wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
   2309		wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
   2310	}
   2311
   2312	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
   2313	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
   2314
   2315	for (i = 0; i < 4; i++) {
   2316		event = cpuc->events[i];
   2317
   2318		if (event) {
   2319			x86_perf_event_set_period(event);
   2320			__x86_pmu_enable_event(&event->hw,
   2321					ARCH_PERFMON_EVENTSEL_ENABLE);
   2322		} else
   2323			wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
   2324	}
   2325}
   2326
   2327static void intel_pmu_nhm_enable_all(int added)
   2328{
   2329	if (added)
   2330		intel_pmu_nhm_workaround();
   2331	intel_pmu_enable_all(added);
   2332}
   2333
   2334static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on)
   2335{
   2336	u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0;
   2337
   2338	if (cpuc->tfa_shadow != val) {
   2339		cpuc->tfa_shadow = val;
   2340		wrmsrl(MSR_TSX_FORCE_ABORT, val);
   2341	}
   2342}
   2343
   2344static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
   2345{
   2346	/*
   2347	 * We're going to use PMC3, make sure TFA is set before we touch it.
   2348	 */
   2349	if (cntr == 3)
   2350		intel_set_tfa(cpuc, true);
   2351}
   2352
   2353static void intel_tfa_pmu_enable_all(int added)
   2354{
   2355	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2356
   2357	/*
   2358	 * If we find PMC3 is no longer used when we enable the PMU, we can
   2359	 * clear TFA.
   2360	 */
   2361	if (!test_bit(3, cpuc->active_mask))
   2362		intel_set_tfa(cpuc, false);
   2363
   2364	intel_pmu_enable_all(added);
   2365}
   2366
   2367static inline u64 intel_pmu_get_status(void)
   2368{
   2369	u64 status;
   2370
   2371	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
   2372
   2373	return status;
   2374}
   2375
   2376static inline void intel_pmu_ack_status(u64 ack)
   2377{
   2378	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
   2379}
   2380
   2381static inline bool event_is_checkpointed(struct perf_event *event)
   2382{
   2383	return unlikely(event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
   2384}
   2385
   2386static inline void intel_set_masks(struct perf_event *event, int idx)
   2387{
   2388	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2389
   2390	if (event->attr.exclude_host)
   2391		__set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
   2392	if (event->attr.exclude_guest)
   2393		__set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
   2394	if (event_is_checkpointed(event))
   2395		__set_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
   2396}
   2397
   2398static inline void intel_clear_masks(struct perf_event *event, int idx)
   2399{
   2400	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2401
   2402	__clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
   2403	__clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
   2404	__clear_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
   2405}
   2406
   2407static void intel_pmu_disable_fixed(struct perf_event *event)
   2408{
   2409	struct hw_perf_event *hwc = &event->hw;
   2410	u64 ctrl_val, mask;
   2411	int idx = hwc->idx;
   2412
   2413	if (is_topdown_idx(idx)) {
   2414		struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2415
   2416		/*
   2417		 * When there are other active TopDown events,
   2418		 * don't disable the fixed counter 3.
   2419		 */
   2420		if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
   2421			return;
   2422		idx = INTEL_PMC_IDX_FIXED_SLOTS;
   2423	}
   2424
   2425	intel_clear_masks(event, idx);
   2426
   2427	mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
   2428	rdmsrl(hwc->config_base, ctrl_val);
   2429	ctrl_val &= ~mask;
   2430	wrmsrl(hwc->config_base, ctrl_val);
   2431}
   2432
   2433static void intel_pmu_disable_event(struct perf_event *event)
   2434{
   2435	struct hw_perf_event *hwc = &event->hw;
   2436	int idx = hwc->idx;
   2437
   2438	switch (idx) {
   2439	case 0 ... INTEL_PMC_IDX_FIXED - 1:
   2440		intel_clear_masks(event, idx);
   2441		x86_pmu_disable_event(event);
   2442		break;
   2443	case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
   2444	case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
   2445		intel_pmu_disable_fixed(event);
   2446		break;
   2447	case INTEL_PMC_IDX_FIXED_BTS:
   2448		intel_pmu_disable_bts();
   2449		intel_pmu_drain_bts_buffer();
   2450		return;
   2451	case INTEL_PMC_IDX_FIXED_VLBR:
   2452		intel_clear_masks(event, idx);
   2453		break;
   2454	default:
   2455		intel_clear_masks(event, idx);
   2456		pr_warn("Failed to disable the event with invalid index %d\n",
   2457			idx);
   2458		return;
   2459	}
   2460
   2461	/*
   2462	 * Needs to be called after x86_pmu_disable_event,
   2463	 * so we don't trigger the event without PEBS bit set.
   2464	 */
   2465	if (unlikely(event->attr.precise_ip))
   2466		intel_pmu_pebs_disable(event);
   2467}
   2468
   2469static void intel_pmu_assign_event(struct perf_event *event, int idx)
   2470{
   2471	if (is_pebs_pt(event))
   2472		perf_report_aux_output_id(event, idx);
   2473}
   2474
   2475static void intel_pmu_del_event(struct perf_event *event)
   2476{
   2477	if (needs_branch_stack(event))
   2478		intel_pmu_lbr_del(event);
   2479	if (event->attr.precise_ip)
   2480		intel_pmu_pebs_del(event);
   2481}
   2482
   2483static int icl_set_topdown_event_period(struct perf_event *event)
   2484{
   2485	struct hw_perf_event *hwc = &event->hw;
   2486	s64 left = local64_read(&hwc->period_left);
   2487
   2488	/*
   2489	 * The values in PERF_METRICS MSR are derived from fixed counter 3.
   2490	 * Software should start both registers, PERF_METRICS and fixed
   2491	 * counter 3, from zero.
   2492	 * Clear PERF_METRICS and Fixed counter 3 in initialization.
   2493	 * After that, both MSRs will be cleared for each read.
   2494	 * Don't need to clear them again.
   2495	 */
   2496	if (left == x86_pmu.max_period) {
   2497		wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
   2498		wrmsrl(MSR_PERF_METRICS, 0);
   2499		hwc->saved_slots = 0;
   2500		hwc->saved_metric = 0;
   2501	}
   2502
   2503	if ((hwc->saved_slots) && is_slots_event(event)) {
   2504		wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
   2505		wrmsrl(MSR_PERF_METRICS, hwc->saved_metric);
   2506	}
   2507
   2508	perf_event_update_userpage(event);
   2509
   2510	return 0;
   2511}
   2512
   2513static int adl_set_topdown_event_period(struct perf_event *event)
   2514{
   2515	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
   2516
   2517	if (pmu->cpu_type != hybrid_big)
   2518		return 0;
   2519
   2520	return icl_set_topdown_event_period(event);
   2521}
   2522
   2523static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
   2524{
   2525	u32 val;
   2526
   2527	/*
   2528	 * The metric is reported as an 8bit integer fraction
   2529	 * summing up to 0xff.
   2530	 * slots-in-metric = (Metric / 0xff) * slots
   2531	 */
   2532	val = (metric >> ((idx - INTEL_PMC_IDX_METRIC_BASE) * 8)) & 0xff;
   2533	return  mul_u64_u32_div(slots, val, 0xff);
   2534}
   2535
   2536static u64 icl_get_topdown_value(struct perf_event *event,
   2537				       u64 slots, u64 metrics)
   2538{
   2539	int idx = event->hw.idx;
   2540	u64 delta;
   2541
   2542	if (is_metric_idx(idx))
   2543		delta = icl_get_metrics_event_value(metrics, slots, idx);
   2544	else
   2545		delta = slots;
   2546
   2547	return delta;
   2548}
   2549
   2550static void __icl_update_topdown_event(struct perf_event *event,
   2551				       u64 slots, u64 metrics,
   2552				       u64 last_slots, u64 last_metrics)
   2553{
   2554	u64 delta, last = 0;
   2555
   2556	delta = icl_get_topdown_value(event, slots, metrics);
   2557	if (last_slots)
   2558		last = icl_get_topdown_value(event, last_slots, last_metrics);
   2559
   2560	/*
   2561	 * The 8bit integer fraction of metric may be not accurate,
   2562	 * especially when the changes is very small.
   2563	 * For example, if only a few bad_spec happens, the fraction
   2564	 * may be reduced from 1 to 0. If so, the bad_spec event value
   2565	 * will be 0 which is definitely less than the last value.
   2566	 * Avoid update event->count for this case.
   2567	 */
   2568	if (delta > last) {
   2569		delta -= last;
   2570		local64_add(delta, &event->count);
   2571	}
   2572}
   2573
   2574static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
   2575				      u64 metrics, int metric_end)
   2576{
   2577	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2578	struct perf_event *other;
   2579	int idx;
   2580
   2581	event->hw.saved_slots = slots;
   2582	event->hw.saved_metric = metrics;
   2583
   2584	for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
   2585		if (!is_topdown_idx(idx))
   2586			continue;
   2587		other = cpuc->events[idx];
   2588		other->hw.saved_slots = slots;
   2589		other->hw.saved_metric = metrics;
   2590	}
   2591}
   2592
   2593/*
   2594 * Update all active Topdown events.
   2595 *
   2596 * The PERF_METRICS and Fixed counter 3 are read separately. The values may be
   2597 * modify by a NMI. PMU has to be disabled before calling this function.
   2598 */
   2599
   2600static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
   2601{
   2602	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2603	struct perf_event *other;
   2604	u64 slots, metrics;
   2605	bool reset = true;
   2606	int idx;
   2607
   2608	/* read Fixed counter 3 */
   2609	rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
   2610	if (!slots)
   2611		return 0;
   2612
   2613	/* read PERF_METRICS */
   2614	rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
   2615
   2616	for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
   2617		if (!is_topdown_idx(idx))
   2618			continue;
   2619		other = cpuc->events[idx];
   2620		__icl_update_topdown_event(other, slots, metrics,
   2621					   event ? event->hw.saved_slots : 0,
   2622					   event ? event->hw.saved_metric : 0);
   2623	}
   2624
   2625	/*
   2626	 * Check and update this event, which may have been cleared
   2627	 * in active_mask e.g. x86_pmu_stop()
   2628	 */
   2629	if (event && !test_bit(event->hw.idx, cpuc->active_mask)) {
   2630		__icl_update_topdown_event(event, slots, metrics,
   2631					   event->hw.saved_slots,
   2632					   event->hw.saved_metric);
   2633
   2634		/*
   2635		 * In x86_pmu_stop(), the event is cleared in active_mask first,
   2636		 * then drain the delta, which indicates context switch for
   2637		 * counting.
   2638		 * Save metric and slots for context switch.
   2639		 * Don't need to reset the PERF_METRICS and Fixed counter 3.
   2640		 * Because the values will be restored in next schedule in.
   2641		 */
   2642		update_saved_topdown_regs(event, slots, metrics, metric_end);
   2643		reset = false;
   2644	}
   2645
   2646	if (reset) {
   2647		/* The fixed counter 3 has to be written before the PERF_METRICS. */
   2648		wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
   2649		wrmsrl(MSR_PERF_METRICS, 0);
   2650		if (event)
   2651			update_saved_topdown_regs(event, 0, 0, metric_end);
   2652	}
   2653
   2654	return slots;
   2655}
   2656
   2657static u64 icl_update_topdown_event(struct perf_event *event)
   2658{
   2659	return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE +
   2660						 x86_pmu.num_topdown_events - 1);
   2661}
   2662
   2663static u64 adl_update_topdown_event(struct perf_event *event)
   2664{
   2665	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
   2666
   2667	if (pmu->cpu_type != hybrid_big)
   2668		return 0;
   2669
   2670	return icl_update_topdown_event(event);
   2671}
   2672
   2673
   2674static void intel_pmu_read_topdown_event(struct perf_event *event)
   2675{
   2676	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2677
   2678	/* Only need to call update_topdown_event() once for group read. */
   2679	if ((cpuc->txn_flags & PERF_PMU_TXN_READ) &&
   2680	    !is_slots_event(event))
   2681		return;
   2682
   2683	perf_pmu_disable(event->pmu);
   2684	x86_pmu.update_topdown_event(event);
   2685	perf_pmu_enable(event->pmu);
   2686}
   2687
   2688static void intel_pmu_read_event(struct perf_event *event)
   2689{
   2690	if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
   2691		intel_pmu_auto_reload_read(event);
   2692	else if (is_topdown_count(event) && x86_pmu.update_topdown_event)
   2693		intel_pmu_read_topdown_event(event);
   2694	else
   2695		x86_perf_event_update(event);
   2696}
   2697
   2698static void intel_pmu_enable_fixed(struct perf_event *event)
   2699{
   2700	struct hw_perf_event *hwc = &event->hw;
   2701	u64 ctrl_val, mask, bits = 0;
   2702	int idx = hwc->idx;
   2703
   2704	if (is_topdown_idx(idx)) {
   2705		struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2706		/*
   2707		 * When there are other active TopDown events,
   2708		 * don't enable the fixed counter 3 again.
   2709		 */
   2710		if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
   2711			return;
   2712
   2713		idx = INTEL_PMC_IDX_FIXED_SLOTS;
   2714	}
   2715
   2716	intel_set_masks(event, idx);
   2717
   2718	/*
   2719	 * Enable IRQ generation (0x8), if not PEBS,
   2720	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
   2721	 * if requested:
   2722	 */
   2723	if (!event->attr.precise_ip)
   2724		bits |= 0x8;
   2725	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
   2726		bits |= 0x2;
   2727	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
   2728		bits |= 0x1;
   2729
   2730	/*
   2731	 * ANY bit is supported in v3 and up
   2732	 */
   2733	if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
   2734		bits |= 0x4;
   2735
   2736	idx -= INTEL_PMC_IDX_FIXED;
   2737	bits <<= (idx * 4);
   2738	mask = 0xfULL << (idx * 4);
   2739
   2740	if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
   2741		bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
   2742		mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
   2743	}
   2744
   2745	rdmsrl(hwc->config_base, ctrl_val);
   2746	ctrl_val &= ~mask;
   2747	ctrl_val |= bits;
   2748	wrmsrl(hwc->config_base, ctrl_val);
   2749}
   2750
   2751static void intel_pmu_enable_event(struct perf_event *event)
   2752{
   2753	struct hw_perf_event *hwc = &event->hw;
   2754	int idx = hwc->idx;
   2755
   2756	if (unlikely(event->attr.precise_ip))
   2757		intel_pmu_pebs_enable(event);
   2758
   2759	switch (idx) {
   2760	case 0 ... INTEL_PMC_IDX_FIXED - 1:
   2761		intel_set_masks(event, idx);
   2762		__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
   2763		break;
   2764	case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
   2765	case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
   2766		intel_pmu_enable_fixed(event);
   2767		break;
   2768	case INTEL_PMC_IDX_FIXED_BTS:
   2769		if (!__this_cpu_read(cpu_hw_events.enabled))
   2770			return;
   2771		intel_pmu_enable_bts(hwc->config);
   2772		break;
   2773	case INTEL_PMC_IDX_FIXED_VLBR:
   2774		intel_set_masks(event, idx);
   2775		break;
   2776	default:
   2777		pr_warn("Failed to enable the event with invalid index %d\n",
   2778			idx);
   2779	}
   2780}
   2781
   2782static void intel_pmu_add_event(struct perf_event *event)
   2783{
   2784	if (event->attr.precise_ip)
   2785		intel_pmu_pebs_add(event);
   2786	if (needs_branch_stack(event))
   2787		intel_pmu_lbr_add(event);
   2788}
   2789
   2790/*
   2791 * Save and restart an expired event. Called by NMI contexts,
   2792 * so it has to be careful about preempting normal event ops:
   2793 */
   2794int intel_pmu_save_and_restart(struct perf_event *event)
   2795{
   2796	x86_perf_event_update(event);
   2797	/*
   2798	 * For a checkpointed counter always reset back to 0.  This
   2799	 * avoids a situation where the counter overflows, aborts the
   2800	 * transaction and is then set back to shortly before the
   2801	 * overflow, and overflows and aborts again.
   2802	 */
   2803	if (unlikely(event_is_checkpointed(event))) {
   2804		/* No race with NMIs because the counter should not be armed */
   2805		wrmsrl(event->hw.event_base, 0);
   2806		local64_set(&event->hw.prev_count, 0);
   2807	}
   2808	return x86_perf_event_set_period(event);
   2809}
   2810
   2811static void intel_pmu_reset(void)
   2812{
   2813	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
   2814	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2815	int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
   2816	int num_counters = hybrid(cpuc->pmu, num_counters);
   2817	unsigned long flags;
   2818	int idx;
   2819
   2820	if (!num_counters)
   2821		return;
   2822
   2823	local_irq_save(flags);
   2824
   2825	pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
   2826
   2827	for (idx = 0; idx < num_counters; idx++) {
   2828		wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
   2829		wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
   2830	}
   2831	for (idx = 0; idx < num_counters_fixed; idx++) {
   2832		if (fixed_counter_disabled(idx, cpuc->pmu))
   2833			continue;
   2834		wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
   2835	}
   2836
   2837	if (ds)
   2838		ds->bts_index = ds->bts_buffer_base;
   2839
   2840	/* Ack all overflows and disable fixed counters */
   2841	if (x86_pmu.version >= 2) {
   2842		intel_pmu_ack_status(intel_pmu_get_status());
   2843		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
   2844	}
   2845
   2846	/* Reset LBRs and LBR freezing */
   2847	if (x86_pmu.lbr_nr) {
   2848		update_debugctlmsr(get_debugctlmsr() &
   2849			~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
   2850	}
   2851
   2852	local_irq_restore(flags);
   2853}
   2854
   2855static int handle_pmi_common(struct pt_regs *regs, u64 status)
   2856{
   2857	struct perf_sample_data data;
   2858	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2859	int bit;
   2860	int handled = 0;
   2861	u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
   2862
   2863	inc_irq_stat(apic_perf_irqs);
   2864
   2865	/*
   2866	 * Ignore a range of extra bits in status that do not indicate
   2867	 * overflow by themselves.
   2868	 */
   2869	status &= ~(GLOBAL_STATUS_COND_CHG |
   2870		    GLOBAL_STATUS_ASIF |
   2871		    GLOBAL_STATUS_LBRS_FROZEN);
   2872	if (!status)
   2873		return 0;
   2874	/*
   2875	 * In case multiple PEBS events are sampled at the same time,
   2876	 * it is possible to have GLOBAL_STATUS bit 62 set indicating
   2877	 * PEBS buffer overflow and also seeing at most 3 PEBS counters
   2878	 * having their bits set in the status register. This is a sign
   2879	 * that there was at least one PEBS record pending at the time
   2880	 * of the PMU interrupt. PEBS counters must only be processed
   2881	 * via the drain_pebs() calls and not via the regular sample
   2882	 * processing loop coming after that the function, otherwise
   2883	 * phony regular samples may be generated in the sampling buffer
   2884	 * not marked with the EXACT tag. Another possibility is to have
   2885	 * one PEBS event and at least one non-PEBS event which overflows
   2886	 * while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will
   2887	 * not be set, yet the overflow status bit for the PEBS counter will
   2888	 * be on Skylake.
   2889	 *
   2890	 * To avoid this problem, we systematically ignore the PEBS-enabled
   2891	 * counters from the GLOBAL_STATUS mask and we always process PEBS
   2892	 * events via drain_pebs().
   2893	 */
   2894	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
   2895		status &= ~cpuc->pebs_enabled;
   2896	else
   2897		status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
   2898
   2899	/*
   2900	 * PEBS overflow sets bit 62 in the global status register
   2901	 */
   2902	if (__test_and_clear_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, (unsigned long *)&status)) {
   2903		u64 pebs_enabled = cpuc->pebs_enabled;
   2904
   2905		handled++;
   2906		x86_pmu.drain_pebs(regs, &data);
   2907		status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
   2908
   2909		/*
   2910		 * PMI throttle may be triggered, which stops the PEBS event.
   2911		 * Although cpuc->pebs_enabled is updated accordingly, the
   2912		 * MSR_IA32_PEBS_ENABLE is not updated. Because the
   2913		 * cpuc->enabled has been forced to 0 in PMI.
   2914		 * Update the MSR if pebs_enabled is changed.
   2915		 */
   2916		if (pebs_enabled != cpuc->pebs_enabled)
   2917			wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
   2918	}
   2919
   2920	/*
   2921	 * Intel PT
   2922	 */
   2923	if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) {
   2924		handled++;
   2925		if (!perf_guest_handle_intel_pt_intr())
   2926			intel_pt_interrupt();
   2927	}
   2928
   2929	/*
   2930	 * Intel Perf metrics
   2931	 */
   2932	if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
   2933		handled++;
   2934		if (x86_pmu.update_topdown_event)
   2935			x86_pmu.update_topdown_event(NULL);
   2936	}
   2937
   2938	/*
   2939	 * Checkpointed counters can lead to 'spurious' PMIs because the
   2940	 * rollback caused by the PMI will have cleared the overflow status
   2941	 * bit. Therefore always force probe these counters.
   2942	 */
   2943	status |= cpuc->intel_cp_status;
   2944
   2945	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
   2946		struct perf_event *event = cpuc->events[bit];
   2947
   2948		handled++;
   2949
   2950		if (!test_bit(bit, cpuc->active_mask))
   2951			continue;
   2952
   2953		if (!intel_pmu_save_and_restart(event))
   2954			continue;
   2955
   2956		perf_sample_data_init(&data, 0, event->hw.last_period);
   2957
   2958		if (has_branch_stack(event))
   2959			data.br_stack = &cpuc->lbr_stack;
   2960
   2961		if (perf_event_overflow(event, &data, regs))
   2962			x86_pmu_stop(event, 0);
   2963	}
   2964
   2965	return handled;
   2966}
   2967
   2968/*
   2969 * This handler is triggered by the local APIC, so the APIC IRQ handling
   2970 * rules apply:
   2971 */
   2972static int intel_pmu_handle_irq(struct pt_regs *regs)
   2973{
   2974	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   2975	bool late_ack = hybrid_bit(cpuc->pmu, late_ack);
   2976	bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack);
   2977	int loops;
   2978	u64 status;
   2979	int handled;
   2980	int pmu_enabled;
   2981
   2982	/*
   2983	 * Save the PMU state.
   2984	 * It needs to be restored when leaving the handler.
   2985	 */
   2986	pmu_enabled = cpuc->enabled;
   2987	/*
   2988	 * In general, the early ACK is only applied for old platforms.
   2989	 * For the big core starts from Haswell, the late ACK should be
   2990	 * applied.
   2991	 * For the small core after Tremont, we have to do the ACK right
   2992	 * before re-enabling counters, which is in the middle of the
   2993	 * NMI handler.
   2994	 */
   2995	if (!late_ack && !mid_ack)
   2996		apic_write(APIC_LVTPC, APIC_DM_NMI);
   2997	intel_bts_disable_local();
   2998	cpuc->enabled = 0;
   2999	__intel_pmu_disable_all(true);
   3000	handled = intel_pmu_drain_bts_buffer();
   3001	handled += intel_bts_interrupt();
   3002	status = intel_pmu_get_status();
   3003	if (!status)
   3004		goto done;
   3005
   3006	loops = 0;
   3007again:
   3008	intel_pmu_lbr_read();
   3009	intel_pmu_ack_status(status);
   3010	if (++loops > 100) {
   3011		static bool warned;
   3012
   3013		if (!warned) {
   3014			WARN(1, "perfevents: irq loop stuck!\n");
   3015			perf_event_print_debug();
   3016			warned = true;
   3017		}
   3018		intel_pmu_reset();
   3019		goto done;
   3020	}
   3021
   3022	handled += handle_pmi_common(regs, status);
   3023
   3024	/*
   3025	 * Repeat if there is more work to be done:
   3026	 */
   3027	status = intel_pmu_get_status();
   3028	if (status)
   3029		goto again;
   3030
   3031done:
   3032	if (mid_ack)
   3033		apic_write(APIC_LVTPC, APIC_DM_NMI);
   3034	/* Only restore PMU state when it's active. See x86_pmu_disable(). */
   3035	cpuc->enabled = pmu_enabled;
   3036	if (pmu_enabled)
   3037		__intel_pmu_enable_all(0, true);
   3038	intel_bts_enable_local();
   3039
   3040	/*
   3041	 * Only unmask the NMI after the overflow counters
   3042	 * have been reset. This avoids spurious NMIs on
   3043	 * Haswell CPUs.
   3044	 */
   3045	if (late_ack)
   3046		apic_write(APIC_LVTPC, APIC_DM_NMI);
   3047	return handled;
   3048}
   3049
   3050static struct event_constraint *
   3051intel_bts_constraints(struct perf_event *event)
   3052{
   3053	if (unlikely(intel_pmu_has_bts(event)))
   3054		return &bts_constraint;
   3055
   3056	return NULL;
   3057}
   3058
   3059/*
   3060 * Note: matches a fake event, like Fixed2.
   3061 */
   3062static struct event_constraint *
   3063intel_vlbr_constraints(struct perf_event *event)
   3064{
   3065	struct event_constraint *c = &vlbr_constraint;
   3066
   3067	if (unlikely(constraint_match(c, event->hw.config))) {
   3068		event->hw.flags |= c->flags;
   3069		return c;
   3070	}
   3071
   3072	return NULL;
   3073}
   3074
   3075static int intel_alt_er(struct cpu_hw_events *cpuc,
   3076			int idx, u64 config)
   3077{
   3078	struct extra_reg *extra_regs = hybrid(cpuc->pmu, extra_regs);
   3079	int alt_idx = idx;
   3080
   3081	if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
   3082		return idx;
   3083
   3084	if (idx == EXTRA_REG_RSP_0)
   3085		alt_idx = EXTRA_REG_RSP_1;
   3086
   3087	if (idx == EXTRA_REG_RSP_1)
   3088		alt_idx = EXTRA_REG_RSP_0;
   3089
   3090	if (config & ~extra_regs[alt_idx].valid_mask)
   3091		return idx;
   3092
   3093	return alt_idx;
   3094}
   3095
   3096static void intel_fixup_er(struct perf_event *event, int idx)
   3097{
   3098	struct extra_reg *extra_regs = hybrid(event->pmu, extra_regs);
   3099	event->hw.extra_reg.idx = idx;
   3100
   3101	if (idx == EXTRA_REG_RSP_0) {
   3102		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
   3103		event->hw.config |= extra_regs[EXTRA_REG_RSP_0].event;
   3104		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
   3105	} else if (idx == EXTRA_REG_RSP_1) {
   3106		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
   3107		event->hw.config |= extra_regs[EXTRA_REG_RSP_1].event;
   3108		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
   3109	}
   3110}
   3111
   3112/*
   3113 * manage allocation of shared extra msr for certain events
   3114 *
   3115 * sharing can be:
   3116 * per-cpu: to be shared between the various events on a single PMU
   3117 * per-core: per-cpu + shared by HT threads
   3118 */
   3119static struct event_constraint *
   3120__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
   3121				   struct perf_event *event,
   3122				   struct hw_perf_event_extra *reg)
   3123{
   3124	struct event_constraint *c = &emptyconstraint;
   3125	struct er_account *era;
   3126	unsigned long flags;
   3127	int idx = reg->idx;
   3128
   3129	/*
   3130	 * reg->alloc can be set due to existing state, so for fake cpuc we
   3131	 * need to ignore this, otherwise we might fail to allocate proper fake
   3132	 * state for this extra reg constraint. Also see the comment below.
   3133	 */
   3134	if (reg->alloc && !cpuc->is_fake)
   3135		return NULL; /* call x86_get_event_constraint() */
   3136
   3137again:
   3138	era = &cpuc->shared_regs->regs[idx];
   3139	/*
   3140	 * we use spin_lock_irqsave() to avoid lockdep issues when
   3141	 * passing a fake cpuc
   3142	 */
   3143	raw_spin_lock_irqsave(&era->lock, flags);
   3144
   3145	if (!atomic_read(&era->ref) || era->config == reg->config) {
   3146
   3147		/*
   3148		 * If its a fake cpuc -- as per validate_{group,event}() we
   3149		 * shouldn't touch event state and we can avoid doing so
   3150		 * since both will only call get_event_constraints() once
   3151		 * on each event, this avoids the need for reg->alloc.
   3152		 *
   3153		 * Not doing the ER fixup will only result in era->reg being
   3154		 * wrong, but since we won't actually try and program hardware
   3155		 * this isn't a problem either.
   3156		 */
   3157		if (!cpuc->is_fake) {
   3158			if (idx != reg->idx)
   3159				intel_fixup_er(event, idx);
   3160
   3161			/*
   3162			 * x86_schedule_events() can call get_event_constraints()
   3163			 * multiple times on events in the case of incremental
   3164			 * scheduling(). reg->alloc ensures we only do the ER
   3165			 * allocation once.
   3166			 */
   3167			reg->alloc = 1;
   3168		}
   3169
   3170		/* lock in msr value */
   3171		era->config = reg->config;
   3172		era->reg = reg->reg;
   3173
   3174		/* one more user */
   3175		atomic_inc(&era->ref);
   3176
   3177		/*
   3178		 * need to call x86_get_event_constraint()
   3179		 * to check if associated event has constraints
   3180		 */
   3181		c = NULL;
   3182	} else {
   3183		idx = intel_alt_er(cpuc, idx, reg->config);
   3184		if (idx != reg->idx) {
   3185			raw_spin_unlock_irqrestore(&era->lock, flags);
   3186			goto again;
   3187		}
   3188	}
   3189	raw_spin_unlock_irqrestore(&era->lock, flags);
   3190
   3191	return c;
   3192}
   3193
   3194static void
   3195__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
   3196				   struct hw_perf_event_extra *reg)
   3197{
   3198	struct er_account *era;
   3199
   3200	/*
   3201	 * Only put constraint if extra reg was actually allocated. Also takes
   3202	 * care of event which do not use an extra shared reg.
   3203	 *
   3204	 * Also, if this is a fake cpuc we shouldn't touch any event state
   3205	 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
   3206	 * either since it'll be thrown out.
   3207	 */
   3208	if (!reg->alloc || cpuc->is_fake)
   3209		return;
   3210
   3211	era = &cpuc->shared_regs->regs[reg->idx];
   3212
   3213	/* one fewer user */
   3214	atomic_dec(&era->ref);
   3215
   3216	/* allocate again next time */
   3217	reg->alloc = 0;
   3218}
   3219
   3220static struct event_constraint *
   3221intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
   3222			      struct perf_event *event)
   3223{
   3224	struct event_constraint *c = NULL, *d;
   3225	struct hw_perf_event_extra *xreg, *breg;
   3226
   3227	xreg = &event->hw.extra_reg;
   3228	if (xreg->idx != EXTRA_REG_NONE) {
   3229		c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
   3230		if (c == &emptyconstraint)
   3231			return c;
   3232	}
   3233	breg = &event->hw.branch_reg;
   3234	if (breg->idx != EXTRA_REG_NONE) {
   3235		d = __intel_shared_reg_get_constraints(cpuc, event, breg);
   3236		if (d == &emptyconstraint) {
   3237			__intel_shared_reg_put_constraints(cpuc, xreg);
   3238			c = d;
   3239		}
   3240	}
   3241	return c;
   3242}
   3243
   3244struct event_constraint *
   3245x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
   3246			  struct perf_event *event)
   3247{
   3248	struct event_constraint *event_constraints = hybrid(cpuc->pmu, event_constraints);
   3249	struct event_constraint *c;
   3250
   3251	if (event_constraints) {
   3252		for_each_event_constraint(c, event_constraints) {
   3253			if (constraint_match(c, event->hw.config)) {
   3254				event->hw.flags |= c->flags;
   3255				return c;
   3256			}
   3257		}
   3258	}
   3259
   3260	return &hybrid_var(cpuc->pmu, unconstrained);
   3261}
   3262
   3263static struct event_constraint *
   3264__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
   3265			    struct perf_event *event)
   3266{
   3267	struct event_constraint *c;
   3268
   3269	c = intel_vlbr_constraints(event);
   3270	if (c)
   3271		return c;
   3272
   3273	c = intel_bts_constraints(event);
   3274	if (c)
   3275		return c;
   3276
   3277	c = intel_shared_regs_constraints(cpuc, event);
   3278	if (c)
   3279		return c;
   3280
   3281	c = intel_pebs_constraints(event);
   3282	if (c)
   3283		return c;
   3284
   3285	return x86_get_event_constraints(cpuc, idx, event);
   3286}
   3287
   3288static void
   3289intel_start_scheduling(struct cpu_hw_events *cpuc)
   3290{
   3291	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
   3292	struct intel_excl_states *xl;
   3293	int tid = cpuc->excl_thread_id;
   3294
   3295	/*
   3296	 * nothing needed if in group validation mode
   3297	 */
   3298	if (cpuc->is_fake || !is_ht_workaround_enabled())
   3299		return;
   3300
   3301	/*
   3302	 * no exclusion needed
   3303	 */
   3304	if (WARN_ON_ONCE(!excl_cntrs))
   3305		return;
   3306
   3307	xl = &excl_cntrs->states[tid];
   3308
   3309	xl->sched_started = true;
   3310	/*
   3311	 * lock shared state until we are done scheduling
   3312	 * in stop_event_scheduling()
   3313	 * makes scheduling appear as a transaction
   3314	 */
   3315	raw_spin_lock(&excl_cntrs->lock);
   3316}
   3317
   3318static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
   3319{
   3320	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
   3321	struct event_constraint *c = cpuc->event_constraint[idx];
   3322	struct intel_excl_states *xl;
   3323	int tid = cpuc->excl_thread_id;
   3324
   3325	if (cpuc->is_fake || !is_ht_workaround_enabled())
   3326		return;
   3327
   3328	if (WARN_ON_ONCE(!excl_cntrs))
   3329		return;
   3330
   3331	if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
   3332		return;
   3333
   3334	xl = &excl_cntrs->states[tid];
   3335
   3336	lockdep_assert_held(&excl_cntrs->lock);
   3337
   3338	if (c->flags & PERF_X86_EVENT_EXCL)
   3339		xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
   3340	else
   3341		xl->state[cntr] = INTEL_EXCL_SHARED;
   3342}
   3343
   3344static void
   3345intel_stop_scheduling(struct cpu_hw_events *cpuc)
   3346{
   3347	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
   3348	struct intel_excl_states *xl;
   3349	int tid = cpuc->excl_thread_id;
   3350
   3351	/*
   3352	 * nothing needed if in group validation mode
   3353	 */
   3354	if (cpuc->is_fake || !is_ht_workaround_enabled())
   3355		return;
   3356	/*
   3357	 * no exclusion needed
   3358	 */
   3359	if (WARN_ON_ONCE(!excl_cntrs))
   3360		return;
   3361
   3362	xl = &excl_cntrs->states[tid];
   3363
   3364	xl->sched_started = false;
   3365	/*
   3366	 * release shared state lock (acquired in intel_start_scheduling())
   3367	 */
   3368	raw_spin_unlock(&excl_cntrs->lock);
   3369}
   3370
   3371static struct event_constraint *
   3372dyn_constraint(struct cpu_hw_events *cpuc, struct event_constraint *c, int idx)
   3373{
   3374	WARN_ON_ONCE(!cpuc->constraint_list);
   3375
   3376	if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
   3377		struct event_constraint *cx;
   3378
   3379		/*
   3380		 * grab pre-allocated constraint entry
   3381		 */
   3382		cx = &cpuc->constraint_list[idx];
   3383
   3384		/*
   3385		 * initialize dynamic constraint
   3386		 * with static constraint
   3387		 */
   3388		*cx = *c;
   3389
   3390		/*
   3391		 * mark constraint as dynamic
   3392		 */
   3393		cx->flags |= PERF_X86_EVENT_DYNAMIC;
   3394		c = cx;
   3395	}
   3396
   3397	return c;
   3398}
   3399
   3400static struct event_constraint *
   3401intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
   3402			   int idx, struct event_constraint *c)
   3403{
   3404	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
   3405	struct intel_excl_states *xlo;
   3406	int tid = cpuc->excl_thread_id;
   3407	int is_excl, i, w;
   3408
   3409	/*
   3410	 * validating a group does not require
   3411	 * enforcing cross-thread  exclusion
   3412	 */
   3413	if (cpuc->is_fake || !is_ht_workaround_enabled())
   3414		return c;
   3415
   3416	/*
   3417	 * no exclusion needed
   3418	 */
   3419	if (WARN_ON_ONCE(!excl_cntrs))
   3420		return c;
   3421
   3422	/*
   3423	 * because we modify the constraint, we need
   3424	 * to make a copy. Static constraints come
   3425	 * from static const tables.
   3426	 *
   3427	 * only needed when constraint has not yet
   3428	 * been cloned (marked dynamic)
   3429	 */
   3430	c = dyn_constraint(cpuc, c, idx);
   3431
   3432	/*
   3433	 * From here on, the constraint is dynamic.
   3434	 * Either it was just allocated above, or it
   3435	 * was allocated during a earlier invocation
   3436	 * of this function
   3437	 */
   3438
   3439	/*
   3440	 * state of sibling HT
   3441	 */
   3442	xlo = &excl_cntrs->states[tid ^ 1];
   3443
   3444	/*
   3445	 * event requires exclusive counter access
   3446	 * across HT threads
   3447	 */
   3448	is_excl = c->flags & PERF_X86_EVENT_EXCL;
   3449	if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
   3450		event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
   3451		if (!cpuc->n_excl++)
   3452			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
   3453	}
   3454
   3455	/*
   3456	 * Modify static constraint with current dynamic
   3457	 * state of thread
   3458	 *
   3459	 * EXCLUSIVE: sibling counter measuring exclusive event
   3460	 * SHARED   : sibling counter measuring non-exclusive event
   3461	 * UNUSED   : sibling counter unused
   3462	 */
   3463	w = c->weight;
   3464	for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
   3465		/*
   3466		 * exclusive event in sibling counter
   3467		 * our corresponding counter cannot be used
   3468		 * regardless of our event
   3469		 */
   3470		if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) {
   3471			__clear_bit(i, c->idxmsk);
   3472			w--;
   3473			continue;
   3474		}
   3475		/*
   3476		 * if measuring an exclusive event, sibling
   3477		 * measuring non-exclusive, then counter cannot
   3478		 * be used
   3479		 */
   3480		if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) {
   3481			__clear_bit(i, c->idxmsk);
   3482			w--;
   3483			continue;
   3484		}
   3485	}
   3486
   3487	/*
   3488	 * if we return an empty mask, then switch
   3489	 * back to static empty constraint to avoid
   3490	 * the cost of freeing later on
   3491	 */
   3492	if (!w)
   3493		c = &emptyconstraint;
   3494
   3495	c->weight = w;
   3496
   3497	return c;
   3498}
   3499
   3500static struct event_constraint *
   3501intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
   3502			    struct perf_event *event)
   3503{
   3504	struct event_constraint *c1, *c2;
   3505
   3506	c1 = cpuc->event_constraint[idx];
   3507
   3508	/*
   3509	 * first time only
   3510	 * - static constraint: no change across incremental scheduling calls
   3511	 * - dynamic constraint: handled by intel_get_excl_constraints()
   3512	 */
   3513	c2 = __intel_get_event_constraints(cpuc, idx, event);
   3514	if (c1) {
   3515	        WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC));
   3516		bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
   3517		c1->weight = c2->weight;
   3518		c2 = c1;
   3519	}
   3520
   3521	if (cpuc->excl_cntrs)
   3522		return intel_get_excl_constraints(cpuc, event, idx, c2);
   3523
   3524	return c2;
   3525}
   3526
   3527static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
   3528		struct perf_event *event)
   3529{
   3530	struct hw_perf_event *hwc = &event->hw;
   3531	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
   3532	int tid = cpuc->excl_thread_id;
   3533	struct intel_excl_states *xl;
   3534
   3535	/*
   3536	 * nothing needed if in group validation mode
   3537	 */
   3538	if (cpuc->is_fake)
   3539		return;
   3540
   3541	if (WARN_ON_ONCE(!excl_cntrs))
   3542		return;
   3543
   3544	if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
   3545		hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
   3546		if (!--cpuc->n_excl)
   3547			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
   3548	}
   3549
   3550	/*
   3551	 * If event was actually assigned, then mark the counter state as
   3552	 * unused now.
   3553	 */
   3554	if (hwc->idx >= 0) {
   3555		xl = &excl_cntrs->states[tid];
   3556
   3557		/*
   3558		 * put_constraint may be called from x86_schedule_events()
   3559		 * which already has the lock held so here make locking
   3560		 * conditional.
   3561		 */
   3562		if (!xl->sched_started)
   3563			raw_spin_lock(&excl_cntrs->lock);
   3564
   3565		xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
   3566
   3567		if (!xl->sched_started)
   3568			raw_spin_unlock(&excl_cntrs->lock);
   3569	}
   3570}
   3571
   3572static void
   3573intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
   3574					struct perf_event *event)
   3575{
   3576	struct hw_perf_event_extra *reg;
   3577
   3578	reg = &event->hw.extra_reg;
   3579	if (reg->idx != EXTRA_REG_NONE)
   3580		__intel_shared_reg_put_constraints(cpuc, reg);
   3581
   3582	reg = &event->hw.branch_reg;
   3583	if (reg->idx != EXTRA_REG_NONE)
   3584		__intel_shared_reg_put_constraints(cpuc, reg);
   3585}
   3586
   3587static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
   3588					struct perf_event *event)
   3589{
   3590	intel_put_shared_regs_event_constraints(cpuc, event);
   3591
   3592	/*
   3593	 * is PMU has exclusive counter restrictions, then
   3594	 * all events are subject to and must call the
   3595	 * put_excl_constraints() routine
   3596	 */
   3597	if (cpuc->excl_cntrs)
   3598		intel_put_excl_constraints(cpuc, event);
   3599}
   3600
   3601static void intel_pebs_aliases_core2(struct perf_event *event)
   3602{
   3603	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
   3604		/*
   3605		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
   3606		 * (0x003c) so that we can use it with PEBS.
   3607		 *
   3608		 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
   3609		 * PEBS capable. However we can use INST_RETIRED.ANY_P
   3610		 * (0x00c0), which is a PEBS capable event, to get the same
   3611		 * count.
   3612		 *
   3613		 * INST_RETIRED.ANY_P counts the number of cycles that retires
   3614		 * CNTMASK instructions. By setting CNTMASK to a value (16)
   3615		 * larger than the maximum number of instructions that can be
   3616		 * retired per cycle (4) and then inverting the condition, we
   3617		 * count all cycles that retire 16 or less instructions, which
   3618		 * is every cycle.
   3619		 *
   3620		 * Thereby we gain a PEBS capable cycle counter.
   3621		 */
   3622		u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
   3623
   3624		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
   3625		event->hw.config = alt_config;
   3626	}
   3627}
   3628
   3629static void intel_pebs_aliases_snb(struct perf_event *event)
   3630{
   3631	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
   3632		/*
   3633		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
   3634		 * (0x003c) so that we can use it with PEBS.
   3635		 *
   3636		 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
   3637		 * PEBS capable. However we can use UOPS_RETIRED.ALL
   3638		 * (0x01c2), which is a PEBS capable event, to get the same
   3639		 * count.
   3640		 *
   3641		 * UOPS_RETIRED.ALL counts the number of cycles that retires
   3642		 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
   3643		 * larger than the maximum number of micro-ops that can be
   3644		 * retired per cycle (4) and then inverting the condition, we
   3645		 * count all cycles that retire 16 or less micro-ops, which
   3646		 * is every cycle.
   3647		 *
   3648		 * Thereby we gain a PEBS capable cycle counter.
   3649		 */
   3650		u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
   3651
   3652		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
   3653		event->hw.config = alt_config;
   3654	}
   3655}
   3656
   3657static void intel_pebs_aliases_precdist(struct perf_event *event)
   3658{
   3659	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
   3660		/*
   3661		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
   3662		 * (0x003c) so that we can use it with PEBS.
   3663		 *
   3664		 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
   3665		 * PEBS capable. However we can use INST_RETIRED.PREC_DIST
   3666		 * (0x01c0), which is a PEBS capable event, to get the same
   3667		 * count.
   3668		 *
   3669		 * The PREC_DIST event has special support to minimize sample
   3670		 * shadowing effects. One drawback is that it can be
   3671		 * only programmed on counter 1, but that seems like an
   3672		 * acceptable trade off.
   3673		 */
   3674		u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16);
   3675
   3676		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
   3677		event->hw.config = alt_config;
   3678	}
   3679}
   3680
   3681static void intel_pebs_aliases_ivb(struct perf_event *event)
   3682{
   3683	if (event->attr.precise_ip < 3)
   3684		return intel_pebs_aliases_snb(event);
   3685	return intel_pebs_aliases_precdist(event);
   3686}
   3687
   3688static void intel_pebs_aliases_skl(struct perf_event *event)
   3689{
   3690	if (event->attr.precise_ip < 3)
   3691		return intel_pebs_aliases_core2(event);
   3692	return intel_pebs_aliases_precdist(event);
   3693}
   3694
   3695static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
   3696{
   3697	unsigned long flags = x86_pmu.large_pebs_flags;
   3698
   3699	if (event->attr.use_clockid)
   3700		flags &= ~PERF_SAMPLE_TIME;
   3701	if (!event->attr.exclude_kernel)
   3702		flags &= ~PERF_SAMPLE_REGS_USER;
   3703	if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
   3704		flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
   3705	return flags;
   3706}
   3707
   3708static int intel_pmu_bts_config(struct perf_event *event)
   3709{
   3710	struct perf_event_attr *attr = &event->attr;
   3711
   3712	if (unlikely(intel_pmu_has_bts(event))) {
   3713		/* BTS is not supported by this architecture. */
   3714		if (!x86_pmu.bts_active)
   3715			return -EOPNOTSUPP;
   3716
   3717		/* BTS is currently only allowed for user-mode. */
   3718		if (!attr->exclude_kernel)
   3719			return -EOPNOTSUPP;
   3720
   3721		/* BTS is not allowed for precise events. */
   3722		if (attr->precise_ip)
   3723			return -EOPNOTSUPP;
   3724
   3725		/* disallow bts if conflicting events are present */
   3726		if (x86_add_exclusive(x86_lbr_exclusive_lbr))
   3727			return -EBUSY;
   3728
   3729		event->destroy = hw_perf_lbr_event_destroy;
   3730	}
   3731
   3732	return 0;
   3733}
   3734
   3735static int core_pmu_hw_config(struct perf_event *event)
   3736{
   3737	int ret = x86_pmu_hw_config(event);
   3738
   3739	if (ret)
   3740		return ret;
   3741
   3742	return intel_pmu_bts_config(event);
   3743}
   3744
   3745#define INTEL_TD_METRIC_AVAILABLE_MAX	(INTEL_TD_METRIC_RETIRING + \
   3746					 ((x86_pmu.num_topdown_events - 1) << 8))
   3747
   3748static bool is_available_metric_event(struct perf_event *event)
   3749{
   3750	return is_metric_event(event) &&
   3751		event->attr.config <= INTEL_TD_METRIC_AVAILABLE_MAX;
   3752}
   3753
   3754static inline bool is_mem_loads_event(struct perf_event *event)
   3755{
   3756	return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xcd, .umask=0x01);
   3757}
   3758
   3759static inline bool is_mem_loads_aux_event(struct perf_event *event)
   3760{
   3761	return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82);
   3762}
   3763
   3764static inline bool require_mem_loads_aux_event(struct perf_event *event)
   3765{
   3766	if (!(x86_pmu.flags & PMU_FL_MEM_LOADS_AUX))
   3767		return false;
   3768
   3769	if (is_hybrid())
   3770		return hybrid_pmu(event->pmu)->cpu_type == hybrid_big;
   3771
   3772	return true;
   3773}
   3774
   3775static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
   3776{
   3777	union perf_capabilities *intel_cap = &hybrid(event->pmu, intel_cap);
   3778
   3779	return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
   3780}
   3781
   3782static int intel_pmu_hw_config(struct perf_event *event)
   3783{
   3784	int ret = x86_pmu_hw_config(event);
   3785
   3786	if (ret)
   3787		return ret;
   3788
   3789	ret = intel_pmu_bts_config(event);
   3790	if (ret)
   3791		return ret;
   3792
   3793	if (event->attr.precise_ip) {
   3794		if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
   3795			return -EINVAL;
   3796
   3797		if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
   3798			event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
   3799			if (!(event->attr.sample_type &
   3800			      ~intel_pmu_large_pebs_flags(event))) {
   3801				event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
   3802				event->attach_state |= PERF_ATTACH_SCHED_CB;
   3803			}
   3804		}
   3805		if (x86_pmu.pebs_aliases)
   3806			x86_pmu.pebs_aliases(event);
   3807
   3808		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
   3809			event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
   3810	}
   3811
   3812	if (needs_branch_stack(event)) {
   3813		ret = intel_pmu_setup_lbr_filter(event);
   3814		if (ret)
   3815			return ret;
   3816		event->attach_state |= PERF_ATTACH_SCHED_CB;
   3817
   3818		/*
   3819		 * BTS is set up earlier in this path, so don't account twice
   3820		 */
   3821		if (!unlikely(intel_pmu_has_bts(event))) {
   3822			/* disallow lbr if conflicting events are present */
   3823			if (x86_add_exclusive(x86_lbr_exclusive_lbr))
   3824				return -EBUSY;
   3825
   3826			event->destroy = hw_perf_lbr_event_destroy;
   3827		}
   3828	}
   3829
   3830	if (event->attr.aux_output) {
   3831		if (!event->attr.precise_ip)
   3832			return -EINVAL;
   3833
   3834		event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
   3835	}
   3836
   3837	if ((event->attr.type == PERF_TYPE_HARDWARE) ||
   3838	    (event->attr.type == PERF_TYPE_HW_CACHE))
   3839		return 0;
   3840
   3841	/*
   3842	 * Config Topdown slots and metric events
   3843	 *
   3844	 * The slots event on Fixed Counter 3 can support sampling,
   3845	 * which will be handled normally in x86_perf_event_update().
   3846	 *
   3847	 * Metric events don't support sampling and require being paired
   3848	 * with a slots event as group leader. When the slots event
   3849	 * is used in a metrics group, it too cannot support sampling.
   3850	 */
   3851	if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) {
   3852		if (event->attr.config1 || event->attr.config2)
   3853			return -EINVAL;
   3854
   3855		/*
   3856		 * The TopDown metrics events and slots event don't
   3857		 * support any filters.
   3858		 */
   3859		if (event->attr.config & X86_ALL_EVENT_FLAGS)
   3860			return -EINVAL;
   3861
   3862		if (is_available_metric_event(event)) {
   3863			struct perf_event *leader = event->group_leader;
   3864
   3865			/* The metric events don't support sampling. */
   3866			if (is_sampling_event(event))
   3867				return -EINVAL;
   3868
   3869			/* The metric events require a slots group leader. */
   3870			if (!is_slots_event(leader))
   3871				return -EINVAL;
   3872
   3873			/*
   3874			 * The leader/SLOTS must not be a sampling event for
   3875			 * metric use; hardware requires it starts at 0 when used
   3876			 * in conjunction with MSR_PERF_METRICS.
   3877			 */
   3878			if (is_sampling_event(leader))
   3879				return -EINVAL;
   3880
   3881			event->event_caps |= PERF_EV_CAP_SIBLING;
   3882			/*
   3883			 * Only once we have a METRICs sibling do we
   3884			 * need TopDown magic.
   3885			 */
   3886			leader->hw.flags |= PERF_X86_EVENT_TOPDOWN;
   3887			event->hw.flags  |= PERF_X86_EVENT_TOPDOWN;
   3888		}
   3889	}
   3890
   3891	/*
   3892	 * The load latency event X86_CONFIG(.event=0xcd, .umask=0x01) on SPR
   3893	 * doesn't function quite right. As a work-around it needs to always be
   3894	 * co-scheduled with a auxiliary event X86_CONFIG(.event=0x03, .umask=0x82).
   3895	 * The actual count of this second event is irrelevant it just needs
   3896	 * to be active to make the first event function correctly.
   3897	 *
   3898	 * In a group, the auxiliary event must be in front of the load latency
   3899	 * event. The rule is to simplify the implementation of the check.
   3900	 * That's because perf cannot have a complete group at the moment.
   3901	 */
   3902	if (require_mem_loads_aux_event(event) &&
   3903	    (event->attr.sample_type & PERF_SAMPLE_DATA_SRC) &&
   3904	    is_mem_loads_event(event)) {
   3905		struct perf_event *leader = event->group_leader;
   3906		struct perf_event *sibling = NULL;
   3907
   3908		if (!is_mem_loads_aux_event(leader)) {
   3909			for_each_sibling_event(sibling, leader) {
   3910				if (is_mem_loads_aux_event(sibling))
   3911					break;
   3912			}
   3913			if (list_entry_is_head(sibling, &leader->sibling_list, sibling_list))
   3914				return -ENODATA;
   3915		}
   3916	}
   3917
   3918	if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
   3919		return 0;
   3920
   3921	if (x86_pmu.version < 3)
   3922		return -EINVAL;
   3923
   3924	ret = perf_allow_cpu(&event->attr);
   3925	if (ret)
   3926		return ret;
   3927
   3928	event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
   3929
   3930	return 0;
   3931}
   3932
   3933static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
   3934{
   3935	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   3936	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
   3937	u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
   3938
   3939	arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
   3940	arr[0].host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
   3941	arr[0].guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask;
   3942	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
   3943		arr[0].guest &= ~cpuc->pebs_enabled;
   3944	else
   3945		arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
   3946	*nr = 1;
   3947
   3948	if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
   3949		/*
   3950		 * If PMU counter has PEBS enabled it is not enough to
   3951		 * disable counter on a guest entry since PEBS memory
   3952		 * write can overshoot guest entry and corrupt guest
   3953		 * memory. Disabling PEBS solves the problem.
   3954		 *
   3955		 * Don't do this if the CPU already enforces it.
   3956		 */
   3957		arr[1].msr = MSR_IA32_PEBS_ENABLE;
   3958		arr[1].host = cpuc->pebs_enabled;
   3959		arr[1].guest = 0;
   3960		*nr = 2;
   3961	}
   3962
   3963	return arr;
   3964}
   3965
   3966static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
   3967{
   3968	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   3969	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
   3970	int idx;
   3971
   3972	for (idx = 0; idx < x86_pmu.num_counters; idx++)  {
   3973		struct perf_event *event = cpuc->events[idx];
   3974
   3975		arr[idx].msr = x86_pmu_config_addr(idx);
   3976		arr[idx].host = arr[idx].guest = 0;
   3977
   3978		if (!test_bit(idx, cpuc->active_mask))
   3979			continue;
   3980
   3981		arr[idx].host = arr[idx].guest =
   3982			event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
   3983
   3984		if (event->attr.exclude_host)
   3985			arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
   3986		else if (event->attr.exclude_guest)
   3987			arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
   3988	}
   3989
   3990	*nr = x86_pmu.num_counters;
   3991	return arr;
   3992}
   3993
   3994static void core_pmu_enable_event(struct perf_event *event)
   3995{
   3996	if (!event->attr.exclude_host)
   3997		x86_pmu_enable_event(event);
   3998}
   3999
   4000static void core_pmu_enable_all(int added)
   4001{
   4002	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   4003	int idx;
   4004
   4005	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
   4006		struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
   4007
   4008		if (!test_bit(idx, cpuc->active_mask) ||
   4009				cpuc->events[idx]->attr.exclude_host)
   4010			continue;
   4011
   4012		__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
   4013	}
   4014}
   4015
   4016static int hsw_hw_config(struct perf_event *event)
   4017{
   4018	int ret = intel_pmu_hw_config(event);
   4019
   4020	if (ret)
   4021		return ret;
   4022	if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
   4023		return 0;
   4024	event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
   4025
   4026	/*
   4027	 * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
   4028	 * PEBS or in ANY thread mode. Since the results are non-sensical forbid
   4029	 * this combination.
   4030	 */
   4031	if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
   4032	     ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
   4033	      event->attr.precise_ip > 0))
   4034		return -EOPNOTSUPP;
   4035
   4036	if (event_is_checkpointed(event)) {
   4037		/*
   4038		 * Sampling of checkpointed events can cause situations where
   4039		 * the CPU constantly aborts because of a overflow, which is
   4040		 * then checkpointed back and ignored. Forbid checkpointing
   4041		 * for sampling.
   4042		 *
   4043		 * But still allow a long sampling period, so that perf stat
   4044		 * from KVM works.
   4045		 */
   4046		if (event->attr.sample_period > 0 &&
   4047		    event->attr.sample_period < 0x7fffffff)
   4048			return -EOPNOTSUPP;
   4049	}
   4050	return 0;
   4051}
   4052
   4053static struct event_constraint counter0_constraint =
   4054			INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
   4055
   4056static struct event_constraint counter2_constraint =
   4057			EVENT_CONSTRAINT(0, 0x4, 0);
   4058
   4059static struct event_constraint fixed0_constraint =
   4060			FIXED_EVENT_CONSTRAINT(0x00c0, 0);
   4061
   4062static struct event_constraint fixed0_counter0_constraint =
   4063			INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);
   4064
   4065static struct event_constraint *
   4066hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
   4067			  struct perf_event *event)
   4068{
   4069	struct event_constraint *c;
   4070
   4071	c = intel_get_event_constraints(cpuc, idx, event);
   4072
   4073	/* Handle special quirk on in_tx_checkpointed only in counter 2 */
   4074	if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
   4075		if (c->idxmsk64 & (1U << 2))
   4076			return &counter2_constraint;
   4077		return &emptyconstraint;
   4078	}
   4079
   4080	return c;
   4081}
   4082
   4083static struct event_constraint *
   4084icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
   4085			  struct perf_event *event)
   4086{
   4087	/*
   4088	 * Fixed counter 0 has less skid.
   4089	 * Force instruction:ppp in Fixed counter 0
   4090	 */
   4091	if ((event->attr.precise_ip == 3) &&
   4092	    constraint_match(&fixed0_constraint, event->hw.config))
   4093		return &fixed0_constraint;
   4094
   4095	return hsw_get_event_constraints(cpuc, idx, event);
   4096}
   4097
   4098static struct event_constraint *
   4099spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
   4100			  struct perf_event *event)
   4101{
   4102	struct event_constraint *c;
   4103
   4104	c = icl_get_event_constraints(cpuc, idx, event);
   4105
   4106	/*
   4107	 * The :ppp indicates the Precise Distribution (PDist) facility, which
   4108	 * is only supported on the GP counter 0. If a :ppp event which is not
   4109	 * available on the GP counter 0, error out.
   4110	 * Exception: Instruction PDIR is only available on the fixed counter 0.
   4111	 */
   4112	if ((event->attr.precise_ip == 3) &&
   4113	    !constraint_match(&fixed0_constraint, event->hw.config)) {
   4114		if (c->idxmsk64 & BIT_ULL(0))
   4115			return &counter0_constraint;
   4116
   4117		return &emptyconstraint;
   4118	}
   4119
   4120	return c;
   4121}
   4122
   4123static struct event_constraint *
   4124glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
   4125			  struct perf_event *event)
   4126{
   4127	struct event_constraint *c;
   4128
   4129	/* :ppp means to do reduced skid PEBS which is PMC0 only. */
   4130	if (event->attr.precise_ip == 3)
   4131		return &counter0_constraint;
   4132
   4133	c = intel_get_event_constraints(cpuc, idx, event);
   4134
   4135	return c;
   4136}
   4137
   4138static struct event_constraint *
   4139tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
   4140			  struct perf_event *event)
   4141{
   4142	struct event_constraint *c;
   4143
   4144	/*
   4145	 * :ppp means to do reduced skid PEBS,
   4146	 * which is available on PMC0 and fixed counter 0.
   4147	 */
   4148	if (event->attr.precise_ip == 3) {
   4149		/* Force instruction:ppp on PMC0 and Fixed counter 0 */
   4150		if (constraint_match(&fixed0_constraint, event->hw.config))
   4151			return &fixed0_counter0_constraint;
   4152
   4153		return &counter0_constraint;
   4154	}
   4155
   4156	c = intel_get_event_constraints(cpuc, idx, event);
   4157
   4158	return c;
   4159}
   4160
   4161static bool allow_tsx_force_abort = true;
   4162
   4163static struct event_constraint *
   4164tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
   4165			  struct perf_event *event)
   4166{
   4167	struct event_constraint *c = hsw_get_event_constraints(cpuc, idx, event);
   4168
   4169	/*
   4170	 * Without TFA we must not use PMC3.
   4171	 */
   4172	if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
   4173		c = dyn_constraint(cpuc, c, idx);
   4174		c->idxmsk64 &= ~(1ULL << 3);
   4175		c->weight--;
   4176	}
   4177
   4178	return c;
   4179}
   4180
   4181static struct event_constraint *
   4182adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
   4183			  struct perf_event *event)
   4184{
   4185	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
   4186
   4187	if (pmu->cpu_type == hybrid_big)
   4188		return spr_get_event_constraints(cpuc, idx, event);
   4189	else if (pmu->cpu_type == hybrid_small)
   4190		return tnt_get_event_constraints(cpuc, idx, event);
   4191
   4192	WARN_ON(1);
   4193	return &emptyconstraint;
   4194}
   4195
   4196static int adl_hw_config(struct perf_event *event)
   4197{
   4198	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
   4199
   4200	if (pmu->cpu_type == hybrid_big)
   4201		return hsw_hw_config(event);
   4202	else if (pmu->cpu_type == hybrid_small)
   4203		return intel_pmu_hw_config(event);
   4204
   4205	WARN_ON(1);
   4206	return -EOPNOTSUPP;
   4207}
   4208
   4209static u8 adl_get_hybrid_cpu_type(void)
   4210{
   4211	return hybrid_big;
   4212}
   4213
   4214/*
   4215 * Broadwell:
   4216 *
   4217 * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
   4218 * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
   4219 * the two to enforce a minimum period of 128 (the smallest value that has bits
   4220 * 0-5 cleared and >= 100).
   4221 *
   4222 * Because of how the code in x86_perf_event_set_period() works, the truncation
   4223 * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
   4224 * to make up for the 'lost' events due to carrying the 'error' in period_left.
   4225 *
   4226 * Therefore the effective (average) period matches the requested period,
   4227 * despite coarser hardware granularity.
   4228 */
   4229static u64 bdw_limit_period(struct perf_event *event, u64 left)
   4230{
   4231	if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
   4232			X86_CONFIG(.event=0xc0, .umask=0x01)) {
   4233		if (left < 128)
   4234			left = 128;
   4235		left &= ~0x3fULL;
   4236	}
   4237	return left;
   4238}
   4239
   4240static u64 nhm_limit_period(struct perf_event *event, u64 left)
   4241{
   4242	return max(left, 32ULL);
   4243}
   4244
   4245static u64 spr_limit_period(struct perf_event *event, u64 left)
   4246{
   4247	if (event->attr.precise_ip == 3)
   4248		return max(left, 128ULL);
   4249
   4250	return left;
   4251}
   4252
   4253PMU_FORMAT_ATTR(event,	"config:0-7"	);
   4254PMU_FORMAT_ATTR(umask,	"config:8-15"	);
   4255PMU_FORMAT_ATTR(edge,	"config:18"	);
   4256PMU_FORMAT_ATTR(pc,	"config:19"	);
   4257PMU_FORMAT_ATTR(any,	"config:21"	); /* v3 + */
   4258PMU_FORMAT_ATTR(inv,	"config:23"	);
   4259PMU_FORMAT_ATTR(cmask,	"config:24-31"	);
   4260PMU_FORMAT_ATTR(in_tx,  "config:32");
   4261PMU_FORMAT_ATTR(in_tx_cp, "config:33");
   4262
   4263static struct attribute *intel_arch_formats_attr[] = {
   4264	&format_attr_event.attr,
   4265	&format_attr_umask.attr,
   4266	&format_attr_edge.attr,
   4267	&format_attr_pc.attr,
   4268	&format_attr_inv.attr,
   4269	&format_attr_cmask.attr,
   4270	NULL,
   4271};
   4272
   4273ssize_t intel_event_sysfs_show(char *page, u64 config)
   4274{
   4275	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
   4276
   4277	return x86_event_sysfs_show(page, config, event);
   4278}
   4279
   4280static struct intel_shared_regs *allocate_shared_regs(int cpu)
   4281{
   4282	struct intel_shared_regs *regs;
   4283	int i;
   4284
   4285	regs = kzalloc_node(sizeof(struct intel_shared_regs),
   4286			    GFP_KERNEL, cpu_to_node(cpu));
   4287	if (regs) {
   4288		/*
   4289		 * initialize the locks to keep lockdep happy
   4290		 */
   4291		for (i = 0; i < EXTRA_REG_MAX; i++)
   4292			raw_spin_lock_init(&regs->regs[i].lock);
   4293
   4294		regs->core_id = -1;
   4295	}
   4296	return regs;
   4297}
   4298
   4299static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
   4300{
   4301	struct intel_excl_cntrs *c;
   4302
   4303	c = kzalloc_node(sizeof(struct intel_excl_cntrs),
   4304			 GFP_KERNEL, cpu_to_node(cpu));
   4305	if (c) {
   4306		raw_spin_lock_init(&c->lock);
   4307		c->core_id = -1;
   4308	}
   4309	return c;
   4310}
   4311
   4312
   4313int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
   4314{
   4315	cpuc->pebs_record_size = x86_pmu.pebs_record_size;
   4316
   4317	if (is_hybrid() || x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
   4318		cpuc->shared_regs = allocate_shared_regs(cpu);
   4319		if (!cpuc->shared_regs)
   4320			goto err;
   4321	}
   4322
   4323	if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) {
   4324		size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
   4325
   4326		cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
   4327		if (!cpuc->constraint_list)
   4328			goto err_shared_regs;
   4329	}
   4330
   4331	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
   4332		cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
   4333		if (!cpuc->excl_cntrs)
   4334			goto err_constraint_list;
   4335
   4336		cpuc->excl_thread_id = 0;
   4337	}
   4338
   4339	return 0;
   4340
   4341err_constraint_list:
   4342	kfree(cpuc->constraint_list);
   4343	cpuc->constraint_list = NULL;
   4344
   4345err_shared_regs:
   4346	kfree(cpuc->shared_regs);
   4347	cpuc->shared_regs = NULL;
   4348
   4349err:
   4350	return -ENOMEM;
   4351}
   4352
   4353static int intel_pmu_cpu_prepare(int cpu)
   4354{
   4355	return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
   4356}
   4357
   4358static void flip_smm_bit(void *data)
   4359{
   4360	unsigned long set = *(unsigned long *)data;
   4361
   4362	if (set > 0) {
   4363		msr_set_bit(MSR_IA32_DEBUGCTLMSR,
   4364			    DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
   4365	} else {
   4366		msr_clear_bit(MSR_IA32_DEBUGCTLMSR,
   4367			      DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
   4368	}
   4369}
   4370
   4371static bool init_hybrid_pmu(int cpu)
   4372{
   4373	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
   4374	u8 cpu_type = get_this_hybrid_cpu_type();
   4375	struct x86_hybrid_pmu *pmu = NULL;
   4376	int i;
   4377
   4378	if (!cpu_type && x86_pmu.get_hybrid_cpu_type)
   4379		cpu_type = x86_pmu.get_hybrid_cpu_type();
   4380
   4381	for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
   4382		if (x86_pmu.hybrid_pmu[i].cpu_type == cpu_type) {
   4383			pmu = &x86_pmu.hybrid_pmu[i];
   4384			break;
   4385		}
   4386	}
   4387	if (WARN_ON_ONCE(!pmu || (pmu->pmu.type == -1))) {
   4388		cpuc->pmu = NULL;
   4389		return false;
   4390	}
   4391
   4392	/* Only check and dump the PMU information for the first CPU */
   4393	if (!cpumask_empty(&pmu->supported_cpus))
   4394		goto end;
   4395
   4396	if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
   4397		return false;
   4398
   4399	pr_info("%s PMU driver: ", pmu->name);
   4400
   4401	if (pmu->intel_cap.pebs_output_pt_available)
   4402		pr_cont("PEBS-via-PT ");
   4403
   4404	pr_cont("\n");
   4405
   4406	x86_pmu_show_pmu_cap(pmu->num_counters, pmu->num_counters_fixed,
   4407			     pmu->intel_ctrl);
   4408
   4409end:
   4410	cpumask_set_cpu(cpu, &pmu->supported_cpus);
   4411	cpuc->pmu = &pmu->pmu;
   4412
   4413	x86_pmu_update_cpu_context(&pmu->pmu, cpu);
   4414
   4415	return true;
   4416}
   4417
   4418static void intel_pmu_cpu_starting(int cpu)
   4419{
   4420	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
   4421	int core_id = topology_core_id(cpu);
   4422	int i;
   4423
   4424	if (is_hybrid() && !init_hybrid_pmu(cpu))
   4425		return;
   4426
   4427	init_debug_store_on_cpu(cpu);
   4428	/*
   4429	 * Deal with CPUs that don't clear their LBRs on power-up.
   4430	 */
   4431	intel_pmu_lbr_reset();
   4432
   4433	cpuc->lbr_sel = NULL;
   4434
   4435	if (x86_pmu.flags & PMU_FL_TFA) {
   4436		WARN_ON_ONCE(cpuc->tfa_shadow);
   4437		cpuc->tfa_shadow = ~0ULL;
   4438		intel_set_tfa(cpuc, false);
   4439	}
   4440
   4441	if (x86_pmu.version > 1)
   4442		flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
   4443
   4444	/*
   4445	 * Disable perf metrics if any added CPU doesn't support it.
   4446	 *
   4447	 * Turn off the check for a hybrid architecture, because the
   4448	 * architecture MSR, MSR_IA32_PERF_CAPABILITIES, only indicate
   4449	 * the architecture features. The perf metrics is a model-specific
   4450	 * feature for now. The corresponding bit should always be 0 on
   4451	 * a hybrid platform, e.g., Alder Lake.
   4452	 */
   4453	if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) {
   4454		union perf_capabilities perf_cap;
   4455
   4456		rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities);
   4457		if (!perf_cap.perf_metrics) {
   4458			x86_pmu.intel_cap.perf_metrics = 0;
   4459			x86_pmu.intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS);
   4460		}
   4461	}
   4462
   4463	if (!cpuc->shared_regs)
   4464		return;
   4465
   4466	if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
   4467		for_each_cpu(i, topology_sibling_cpumask(cpu)) {
   4468			struct intel_shared_regs *pc;
   4469
   4470			pc = per_cpu(cpu_hw_events, i).shared_regs;
   4471			if (pc && pc->core_id == core_id) {
   4472				cpuc->kfree_on_online[0] = cpuc->shared_regs;
   4473				cpuc->shared_regs = pc;
   4474				break;
   4475			}
   4476		}
   4477		cpuc->shared_regs->core_id = core_id;
   4478		cpuc->shared_regs->refcnt++;
   4479	}
   4480
   4481	if (x86_pmu.lbr_sel_map)
   4482		cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
   4483
   4484	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
   4485		for_each_cpu(i, topology_sibling_cpumask(cpu)) {
   4486			struct cpu_hw_events *sibling;
   4487			struct intel_excl_cntrs *c;
   4488
   4489			sibling = &per_cpu(cpu_hw_events, i);
   4490			c = sibling->excl_cntrs;
   4491			if (c && c->core_id == core_id) {
   4492				cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
   4493				cpuc->excl_cntrs = c;
   4494				if (!sibling->excl_thread_id)
   4495					cpuc->excl_thread_id = 1;
   4496				break;
   4497			}
   4498		}
   4499		cpuc->excl_cntrs->core_id = core_id;
   4500		cpuc->excl_cntrs->refcnt++;
   4501	}
   4502}
   4503
   4504static void free_excl_cntrs(struct cpu_hw_events *cpuc)
   4505{
   4506	struct intel_excl_cntrs *c;
   4507
   4508	c = cpuc->excl_cntrs;
   4509	if (c) {
   4510		if (c->core_id == -1 || --c->refcnt == 0)
   4511			kfree(c);
   4512		cpuc->excl_cntrs = NULL;
   4513	}
   4514
   4515	kfree(cpuc->constraint_list);
   4516	cpuc->constraint_list = NULL;
   4517}
   4518
   4519static void intel_pmu_cpu_dying(int cpu)
   4520{
   4521	fini_debug_store_on_cpu(cpu);
   4522}
   4523
   4524void intel_cpuc_finish(struct cpu_hw_events *cpuc)
   4525{
   4526	struct intel_shared_regs *pc;
   4527
   4528	pc = cpuc->shared_regs;
   4529	if (pc) {
   4530		if (pc->core_id == -1 || --pc->refcnt == 0)
   4531			kfree(pc);
   4532		cpuc->shared_regs = NULL;
   4533	}
   4534
   4535	free_excl_cntrs(cpuc);
   4536}
   4537
   4538static void intel_pmu_cpu_dead(int cpu)
   4539{
   4540	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
   4541
   4542	intel_cpuc_finish(cpuc);
   4543
   4544	if (is_hybrid() && cpuc->pmu)
   4545		cpumask_clear_cpu(cpu, &hybrid_pmu(cpuc->pmu)->supported_cpus);
   4546}
   4547
   4548static void intel_pmu_sched_task(struct perf_event_context *ctx,
   4549				 bool sched_in)
   4550{
   4551	intel_pmu_pebs_sched_task(ctx, sched_in);
   4552	intel_pmu_lbr_sched_task(ctx, sched_in);
   4553}
   4554
   4555static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
   4556				    struct perf_event_context *next)
   4557{
   4558	intel_pmu_lbr_swap_task_ctx(prev, next);
   4559}
   4560
   4561static int intel_pmu_check_period(struct perf_event *event, u64 value)
   4562{
   4563	return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
   4564}
   4565
   4566static void intel_aux_output_init(void)
   4567{
   4568	/* Refer also intel_pmu_aux_output_match() */
   4569	if (x86_pmu.intel_cap.pebs_output_pt_available)
   4570		x86_pmu.assign = intel_pmu_assign_event;
   4571}
   4572
   4573static int intel_pmu_aux_output_match(struct perf_event *event)
   4574{
   4575	/* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
   4576	if (!x86_pmu.intel_cap.pebs_output_pt_available)
   4577		return 0;
   4578
   4579	return is_intel_pt_event(event);
   4580}
   4581
   4582static int intel_pmu_filter_match(struct perf_event *event)
   4583{
   4584	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
   4585	unsigned int cpu = smp_processor_id();
   4586
   4587	return cpumask_test_cpu(cpu, &pmu->supported_cpus);
   4588}
   4589
   4590PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
   4591
   4592PMU_FORMAT_ATTR(ldlat, "config1:0-15");
   4593
   4594PMU_FORMAT_ATTR(frontend, "config1:0-23");
   4595
   4596static struct attribute *intel_arch3_formats_attr[] = {
   4597	&format_attr_event.attr,
   4598	&format_attr_umask.attr,
   4599	&format_attr_edge.attr,
   4600	&format_attr_pc.attr,
   4601	&format_attr_any.attr,
   4602	&format_attr_inv.attr,
   4603	&format_attr_cmask.attr,
   4604	NULL,
   4605};
   4606
   4607static struct attribute *hsw_format_attr[] = {
   4608	&format_attr_in_tx.attr,
   4609	&format_attr_in_tx_cp.attr,
   4610	&format_attr_offcore_rsp.attr,
   4611	&format_attr_ldlat.attr,
   4612	NULL
   4613};
   4614
   4615static struct attribute *nhm_format_attr[] = {
   4616	&format_attr_offcore_rsp.attr,
   4617	&format_attr_ldlat.attr,
   4618	NULL
   4619};
   4620
   4621static struct attribute *slm_format_attr[] = {
   4622	&format_attr_offcore_rsp.attr,
   4623	NULL
   4624};
   4625
   4626static struct attribute *skl_format_attr[] = {
   4627	&format_attr_frontend.attr,
   4628	NULL,
   4629};
   4630
   4631static __initconst const struct x86_pmu core_pmu = {
   4632	.name			= "core",
   4633	.handle_irq		= x86_pmu_handle_irq,
   4634	.disable_all		= x86_pmu_disable_all,
   4635	.enable_all		= core_pmu_enable_all,
   4636	.enable			= core_pmu_enable_event,
   4637	.disable		= x86_pmu_disable_event,
   4638	.hw_config		= core_pmu_hw_config,
   4639	.schedule_events	= x86_schedule_events,
   4640	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
   4641	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
   4642	.event_map		= intel_pmu_event_map,
   4643	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
   4644	.apic			= 1,
   4645	.large_pebs_flags	= LARGE_PEBS_FLAGS,
   4646
   4647	/*
   4648	 * Intel PMCs cannot be accessed sanely above 32-bit width,
   4649	 * so we install an artificial 1<<31 period regardless of
   4650	 * the generic event period:
   4651	 */
   4652	.max_period		= (1ULL<<31) - 1,
   4653	.get_event_constraints	= intel_get_event_constraints,
   4654	.put_event_constraints	= intel_put_event_constraints,
   4655	.event_constraints	= intel_core_event_constraints,
   4656	.guest_get_msrs		= core_guest_get_msrs,
   4657	.format_attrs		= intel_arch_formats_attr,
   4658	.events_sysfs_show	= intel_event_sysfs_show,
   4659
   4660	/*
   4661	 * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
   4662	 * together with PMU version 1 and thus be using core_pmu with
   4663	 * shared_regs. We need following callbacks here to allocate
   4664	 * it properly.
   4665	 */
   4666	.cpu_prepare		= intel_pmu_cpu_prepare,
   4667	.cpu_starting		= intel_pmu_cpu_starting,
   4668	.cpu_dying		= intel_pmu_cpu_dying,
   4669	.cpu_dead		= intel_pmu_cpu_dead,
   4670
   4671	.check_period		= intel_pmu_check_period,
   4672
   4673	.lbr_reset		= intel_pmu_lbr_reset_64,
   4674	.lbr_read		= intel_pmu_lbr_read_64,
   4675	.lbr_save		= intel_pmu_lbr_save,
   4676	.lbr_restore		= intel_pmu_lbr_restore,
   4677};
   4678
   4679static __initconst const struct x86_pmu intel_pmu = {
   4680	.name			= "Intel",
   4681	.handle_irq		= intel_pmu_handle_irq,
   4682	.disable_all		= intel_pmu_disable_all,
   4683	.enable_all		= intel_pmu_enable_all,
   4684	.enable			= intel_pmu_enable_event,
   4685	.disable		= intel_pmu_disable_event,
   4686	.add			= intel_pmu_add_event,
   4687	.del			= intel_pmu_del_event,
   4688	.read			= intel_pmu_read_event,
   4689	.hw_config		= intel_pmu_hw_config,
   4690	.schedule_events	= x86_schedule_events,
   4691	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
   4692	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
   4693	.event_map		= intel_pmu_event_map,
   4694	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
   4695	.apic			= 1,
   4696	.large_pebs_flags	= LARGE_PEBS_FLAGS,
   4697	/*
   4698	 * Intel PMCs cannot be accessed sanely above 32 bit width,
   4699	 * so we install an artificial 1<<31 period regardless of
   4700	 * the generic event period:
   4701	 */
   4702	.max_period		= (1ULL << 31) - 1,
   4703	.get_event_constraints	= intel_get_event_constraints,
   4704	.put_event_constraints	= intel_put_event_constraints,
   4705	.pebs_aliases		= intel_pebs_aliases_core2,
   4706
   4707	.format_attrs		= intel_arch3_formats_attr,
   4708	.events_sysfs_show	= intel_event_sysfs_show,
   4709
   4710	.cpu_prepare		= intel_pmu_cpu_prepare,
   4711	.cpu_starting		= intel_pmu_cpu_starting,
   4712	.cpu_dying		= intel_pmu_cpu_dying,
   4713	.cpu_dead		= intel_pmu_cpu_dead,
   4714
   4715	.guest_get_msrs		= intel_guest_get_msrs,
   4716	.sched_task		= intel_pmu_sched_task,
   4717	.swap_task_ctx		= intel_pmu_swap_task_ctx,
   4718
   4719	.check_period		= intel_pmu_check_period,
   4720
   4721	.aux_output_match	= intel_pmu_aux_output_match,
   4722
   4723	.lbr_reset		= intel_pmu_lbr_reset_64,
   4724	.lbr_read		= intel_pmu_lbr_read_64,
   4725	.lbr_save		= intel_pmu_lbr_save,
   4726	.lbr_restore		= intel_pmu_lbr_restore,
   4727
   4728	/*
   4729	 * SMM has access to all 4 rings and while traditionally SMM code only
   4730	 * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM.
   4731	 *
   4732	 * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction
   4733	 * between SMM or not, this results in what should be pure userspace
   4734	 * counters including SMM data.
   4735	 *
   4736	 * This is a clear privilege issue, therefore globally disable
   4737	 * counting SMM by default.
   4738	 */
   4739	.attr_freeze_on_smi	= 1,
   4740};
   4741
   4742static __init void intel_clovertown_quirk(void)
   4743{
   4744	/*
   4745	 * PEBS is unreliable due to:
   4746	 *
   4747	 *   AJ67  - PEBS may experience CPL leaks
   4748	 *   AJ68  - PEBS PMI may be delayed by one event
   4749	 *   AJ69  - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
   4750	 *   AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
   4751	 *
   4752	 * AJ67 could be worked around by restricting the OS/USR flags.
   4753	 * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
   4754	 *
   4755	 * AJ106 could possibly be worked around by not allowing LBR
   4756	 *       usage from PEBS, including the fixup.
   4757	 * AJ68  could possibly be worked around by always programming
   4758	 *	 a pebs_event_reset[0] value and coping with the lost events.
   4759	 *
   4760	 * But taken together it might just make sense to not enable PEBS on
   4761	 * these chips.
   4762	 */
   4763	pr_warn("PEBS disabled due to CPU errata\n");
   4764	x86_pmu.pebs = 0;
   4765	x86_pmu.pebs_constraints = NULL;
   4766}
   4767
   4768static const struct x86_cpu_desc isolation_ucodes[] = {
   4769	INTEL_CPU_DESC(INTEL_FAM6_HASWELL,		 3, 0x0000001f),
   4770	INTEL_CPU_DESC(INTEL_FAM6_HASWELL_L,		 1, 0x0000001e),
   4771	INTEL_CPU_DESC(INTEL_FAM6_HASWELL_G,		 1, 0x00000015),
   4772	INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X,		 2, 0x00000037),
   4773	INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X,		 4, 0x0000000a),
   4774	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL,		 4, 0x00000023),
   4775	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_G,		 1, 0x00000014),
   4776	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D,		 2, 0x00000010),
   4777	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D,		 3, 0x07000009),
   4778	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D,		 4, 0x0f000009),
   4779	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D,		 5, 0x0e000002),
   4780	INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X,		 1, 0x0b000014),
   4781	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,		 3, 0x00000021),
   4782	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,		 4, 0x00000000),
   4783	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,		 5, 0x00000000),
   4784	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,		 6, 0x00000000),
   4785	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,		 7, 0x00000000),
   4786	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L,		 3, 0x0000007c),
   4787	INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE,		 3, 0x0000007c),
   4788	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,		 9, 0x0000004e),
   4789	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L,		 9, 0x0000004e),
   4790	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L,		10, 0x0000004e),
   4791	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L,		11, 0x0000004e),
   4792	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L,		12, 0x0000004e),
   4793	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,		10, 0x0000004e),
   4794	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,		11, 0x0000004e),
   4795	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,		12, 0x0000004e),
   4796	INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,		13, 0x0000004e),
   4797	{}
   4798};
   4799
   4800static void intel_check_pebs_isolation(void)
   4801{
   4802	x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes);
   4803}
   4804
   4805static __init void intel_pebs_isolation_quirk(void)
   4806{
   4807	WARN_ON_ONCE(x86_pmu.check_microcode);
   4808	x86_pmu.check_microcode = intel_check_pebs_isolation;
   4809	intel_check_pebs_isolation();
   4810}
   4811
   4812static const struct x86_cpu_desc pebs_ucodes[] = {
   4813	INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE,		7, 0x00000028),
   4814	INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X,	6, 0x00000618),
   4815	INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X,	7, 0x0000070c),
   4816	{}
   4817};
   4818
   4819static bool intel_snb_pebs_broken(void)
   4820{
   4821	return !x86_cpu_has_min_microcode_rev(pebs_ucodes);
   4822}
   4823
   4824static void intel_snb_check_microcode(void)
   4825{
   4826	if (intel_snb_pebs_broken() == x86_pmu.pebs_broken)
   4827		return;
   4828
   4829	/*
   4830	 * Serialized by the microcode lock..
   4831	 */
   4832	if (x86_pmu.pebs_broken) {
   4833		pr_info("PEBS enabled due to microcode update\n");
   4834		x86_pmu.pebs_broken = 0;
   4835	} else {
   4836		pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
   4837		x86_pmu.pebs_broken = 1;
   4838	}
   4839}
   4840
   4841static bool is_lbr_from(unsigned long msr)
   4842{
   4843	unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
   4844
   4845	return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
   4846}
   4847
   4848/*
   4849 * Under certain circumstances, access certain MSR may cause #GP.
   4850 * The function tests if the input MSR can be safely accessed.
   4851 */
   4852static bool check_msr(unsigned long msr, u64 mask)
   4853{
   4854	u64 val_old, val_new, val_tmp;
   4855
   4856	/*
   4857	 * Disable the check for real HW, so we don't
   4858	 * mess with potentially enabled registers:
   4859	 */
   4860	if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
   4861		return true;
   4862
   4863	/*
   4864	 * Read the current value, change it and read it back to see if it
   4865	 * matches, this is needed to detect certain hardware emulators
   4866	 * (qemu/kvm) that don't trap on the MSR access and always return 0s.
   4867	 */
   4868	if (rdmsrl_safe(msr, &val_old))
   4869		return false;
   4870
   4871	/*
   4872	 * Only change the bits which can be updated by wrmsrl.
   4873	 */
   4874	val_tmp = val_old ^ mask;
   4875
   4876	if (is_lbr_from(msr))
   4877		val_tmp = lbr_from_signext_quirk_wr(val_tmp);
   4878
   4879	if (wrmsrl_safe(msr, val_tmp) ||
   4880	    rdmsrl_safe(msr, &val_new))
   4881		return false;
   4882
   4883	/*
   4884	 * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
   4885	 * should equal rdmsrl()'s even with the quirk.
   4886	 */
   4887	if (val_new != val_tmp)
   4888		return false;
   4889
   4890	if (is_lbr_from(msr))
   4891		val_old = lbr_from_signext_quirk_wr(val_old);
   4892
   4893	/* Here it's sure that the MSR can be safely accessed.
   4894	 * Restore the old value and return.
   4895	 */
   4896	wrmsrl(msr, val_old);
   4897
   4898	return true;
   4899}
   4900
   4901static __init void intel_sandybridge_quirk(void)
   4902{
   4903	x86_pmu.check_microcode = intel_snb_check_microcode;
   4904	cpus_read_lock();
   4905	intel_snb_check_microcode();
   4906	cpus_read_unlock();
   4907}
   4908
   4909static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
   4910	{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
   4911	{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
   4912	{ PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
   4913	{ PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
   4914	{ PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
   4915	{ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
   4916	{ PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
   4917};
   4918
   4919static __init void intel_arch_events_quirk(void)
   4920{
   4921	int bit;
   4922
   4923	/* disable event that reported as not present by cpuid */
   4924	for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
   4925		intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
   4926		pr_warn("CPUID marked event: \'%s\' unavailable\n",
   4927			intel_arch_events_map[bit].name);
   4928	}
   4929}
   4930
   4931static __init void intel_nehalem_quirk(void)
   4932{
   4933	union cpuid10_ebx ebx;
   4934
   4935	ebx.full = x86_pmu.events_maskl;
   4936	if (ebx.split.no_branch_misses_retired) {
   4937		/*
   4938		 * Erratum AAJ80 detected, we work it around by using
   4939		 * the BR_MISP_EXEC.ANY event. This will over-count
   4940		 * branch-misses, but it's still much better than the
   4941		 * architectural event which is often completely bogus:
   4942		 */
   4943		intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
   4944		ebx.split.no_branch_misses_retired = 0;
   4945		x86_pmu.events_maskl = ebx.full;
   4946		pr_info("CPU erratum AAJ80 worked around\n");
   4947	}
   4948}
   4949
   4950/*
   4951 * enable software workaround for errata:
   4952 * SNB: BJ122
   4953 * IVB: BV98
   4954 * HSW: HSD29
   4955 *
   4956 * Only needed when HT is enabled. However detecting
   4957 * if HT is enabled is difficult (model specific). So instead,
   4958 * we enable the workaround in the early boot, and verify if
   4959 * it is needed in a later initcall phase once we have valid
   4960 * topology information to check if HT is actually enabled
   4961 */
   4962static __init void intel_ht_bug(void)
   4963{
   4964	x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
   4965
   4966	x86_pmu.start_scheduling = intel_start_scheduling;
   4967	x86_pmu.commit_scheduling = intel_commit_scheduling;
   4968	x86_pmu.stop_scheduling = intel_stop_scheduling;
   4969}
   4970
   4971EVENT_ATTR_STR(mem-loads,	mem_ld_hsw,	"event=0xcd,umask=0x1,ldlat=3");
   4972EVENT_ATTR_STR(mem-stores,	mem_st_hsw,	"event=0xd0,umask=0x82")
   4973
   4974/* Haswell special events */
   4975EVENT_ATTR_STR(tx-start,	tx_start,	"event=0xc9,umask=0x1");
   4976EVENT_ATTR_STR(tx-commit,	tx_commit,	"event=0xc9,umask=0x2");
   4977EVENT_ATTR_STR(tx-abort,	tx_abort,	"event=0xc9,umask=0x4");
   4978EVENT_ATTR_STR(tx-capacity,	tx_capacity,	"event=0x54,umask=0x2");
   4979EVENT_ATTR_STR(tx-conflict,	tx_conflict,	"event=0x54,umask=0x1");
   4980EVENT_ATTR_STR(el-start,	el_start,	"event=0xc8,umask=0x1");
   4981EVENT_ATTR_STR(el-commit,	el_commit,	"event=0xc8,umask=0x2");
   4982EVENT_ATTR_STR(el-abort,	el_abort,	"event=0xc8,umask=0x4");
   4983EVENT_ATTR_STR(el-capacity,	el_capacity,	"event=0x54,umask=0x2");
   4984EVENT_ATTR_STR(el-conflict,	el_conflict,	"event=0x54,umask=0x1");
   4985EVENT_ATTR_STR(cycles-t,	cycles_t,	"event=0x3c,in_tx=1");
   4986EVENT_ATTR_STR(cycles-ct,	cycles_ct,	"event=0x3c,in_tx=1,in_tx_cp=1");
   4987
   4988static struct attribute *hsw_events_attrs[] = {
   4989	EVENT_PTR(td_slots_issued),
   4990	EVENT_PTR(td_slots_retired),
   4991	EVENT_PTR(td_fetch_bubbles),
   4992	EVENT_PTR(td_total_slots),
   4993	EVENT_PTR(td_total_slots_scale),
   4994	EVENT_PTR(td_recovery_bubbles),
   4995	EVENT_PTR(td_recovery_bubbles_scale),
   4996	NULL
   4997};
   4998
   4999static struct attribute *hsw_mem_events_attrs[] = {
   5000	EVENT_PTR(mem_ld_hsw),
   5001	EVENT_PTR(mem_st_hsw),
   5002	NULL,
   5003};
   5004
   5005static struct attribute *hsw_tsx_events_attrs[] = {
   5006	EVENT_PTR(tx_start),
   5007	EVENT_PTR(tx_commit),
   5008	EVENT_PTR(tx_abort),
   5009	EVENT_PTR(tx_capacity),
   5010	EVENT_PTR(tx_conflict),
   5011	EVENT_PTR(el_start),
   5012	EVENT_PTR(el_commit),
   5013	EVENT_PTR(el_abort),
   5014	EVENT_PTR(el_capacity),
   5015	EVENT_PTR(el_conflict),
   5016	EVENT_PTR(cycles_t),
   5017	EVENT_PTR(cycles_ct),
   5018	NULL
   5019};
   5020
   5021EVENT_ATTR_STR(tx-capacity-read,  tx_capacity_read,  "event=0x54,umask=0x80");
   5022EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2");
   5023EVENT_ATTR_STR(el-capacity-read,  el_capacity_read,  "event=0x54,umask=0x80");
   5024EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2");
   5025
   5026static struct attribute *icl_events_attrs[] = {
   5027	EVENT_PTR(mem_ld_hsw),
   5028	EVENT_PTR(mem_st_hsw),
   5029	NULL,
   5030};
   5031
   5032static struct attribute *icl_td_events_attrs[] = {
   5033	EVENT_PTR(slots),
   5034	EVENT_PTR(td_retiring),
   5035	EVENT_PTR(td_bad_spec),
   5036	EVENT_PTR(td_fe_bound),
   5037	EVENT_PTR(td_be_bound),
   5038	NULL,
   5039};
   5040
   5041static struct attribute *icl_tsx_events_attrs[] = {
   5042	EVENT_PTR(tx_start),
   5043	EVENT_PTR(tx_abort),
   5044	EVENT_PTR(tx_commit),
   5045	EVENT_PTR(tx_capacity_read),
   5046	EVENT_PTR(tx_capacity_write),
   5047	EVENT_PTR(tx_conflict),
   5048	EVENT_PTR(el_start),
   5049	EVENT_PTR(el_abort),
   5050	EVENT_PTR(el_commit),
   5051	EVENT_PTR(el_capacity_read),
   5052	EVENT_PTR(el_capacity_write),
   5053	EVENT_PTR(el_conflict),
   5054	EVENT_PTR(cycles_t),
   5055	EVENT_PTR(cycles_ct),
   5056	NULL,
   5057};
   5058
   5059
   5060EVENT_ATTR_STR(mem-stores,	mem_st_spr,	"event=0xcd,umask=0x2");
   5061EVENT_ATTR_STR(mem-loads-aux,	mem_ld_aux,	"event=0x03,umask=0x82");
   5062
   5063static struct attribute *spr_events_attrs[] = {
   5064	EVENT_PTR(mem_ld_hsw),
   5065	EVENT_PTR(mem_st_spr),
   5066	EVENT_PTR(mem_ld_aux),
   5067	NULL,
   5068};
   5069
   5070static struct attribute *spr_td_events_attrs[] = {
   5071	EVENT_PTR(slots),
   5072	EVENT_PTR(td_retiring),
   5073	EVENT_PTR(td_bad_spec),
   5074	EVENT_PTR(td_fe_bound),
   5075	EVENT_PTR(td_be_bound),
   5076	EVENT_PTR(td_heavy_ops),
   5077	EVENT_PTR(td_br_mispredict),
   5078	EVENT_PTR(td_fetch_lat),
   5079	EVENT_PTR(td_mem_bound),
   5080	NULL,
   5081};
   5082
   5083static struct attribute *spr_tsx_events_attrs[] = {
   5084	EVENT_PTR(tx_start),
   5085	EVENT_PTR(tx_abort),
   5086	EVENT_PTR(tx_commit),
   5087	EVENT_PTR(tx_capacity_read),
   5088	EVENT_PTR(tx_capacity_write),
   5089	EVENT_PTR(tx_conflict),
   5090	EVENT_PTR(cycles_t),
   5091	EVENT_PTR(cycles_ct),
   5092	NULL,
   5093};
   5094
   5095static ssize_t freeze_on_smi_show(struct device *cdev,
   5096				  struct device_attribute *attr,
   5097				  char *buf)
   5098{
   5099	return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi);
   5100}
   5101
   5102static DEFINE_MUTEX(freeze_on_smi_mutex);
   5103
   5104static ssize_t freeze_on_smi_store(struct device *cdev,
   5105				   struct device_attribute *attr,
   5106				   const char *buf, size_t count)
   5107{
   5108	unsigned long val;
   5109	ssize_t ret;
   5110
   5111	ret = kstrtoul(buf, 0, &val);
   5112	if (ret)
   5113		return ret;
   5114
   5115	if (val > 1)
   5116		return -EINVAL;
   5117
   5118	mutex_lock(&freeze_on_smi_mutex);
   5119
   5120	if (x86_pmu.attr_freeze_on_smi == val)
   5121		goto done;
   5122
   5123	x86_pmu.attr_freeze_on_smi = val;
   5124
   5125	cpus_read_lock();
   5126	on_each_cpu(flip_smm_bit, &val, 1);
   5127	cpus_read_unlock();
   5128done:
   5129	mutex_unlock(&freeze_on_smi_mutex);
   5130
   5131	return count;
   5132}
   5133
   5134static void update_tfa_sched(void *ignored)
   5135{
   5136	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   5137
   5138	/*
   5139	 * check if PMC3 is used
   5140	 * and if so force schedule out for all event types all contexts
   5141	 */
   5142	if (test_bit(3, cpuc->active_mask))
   5143		perf_pmu_resched(x86_get_pmu(smp_processor_id()));
   5144}
   5145
   5146static ssize_t show_sysctl_tfa(struct device *cdev,
   5147			      struct device_attribute *attr,
   5148			      char *buf)
   5149{
   5150	return snprintf(buf, 40, "%d\n", allow_tsx_force_abort);
   5151}
   5152
   5153static ssize_t set_sysctl_tfa(struct device *cdev,
   5154			      struct device_attribute *attr,
   5155			      const char *buf, size_t count)
   5156{
   5157	bool val;
   5158	ssize_t ret;
   5159
   5160	ret = kstrtobool(buf, &val);
   5161	if (ret)
   5162		return ret;
   5163
   5164	/* no change */
   5165	if (val == allow_tsx_force_abort)
   5166		return count;
   5167
   5168	allow_tsx_force_abort = val;
   5169
   5170	cpus_read_lock();
   5171	on_each_cpu(update_tfa_sched, NULL, 1);
   5172	cpus_read_unlock();
   5173
   5174	return count;
   5175}
   5176
   5177
   5178static DEVICE_ATTR_RW(freeze_on_smi);
   5179
   5180static ssize_t branches_show(struct device *cdev,
   5181			     struct device_attribute *attr,
   5182			     char *buf)
   5183{
   5184	return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
   5185}
   5186
   5187static DEVICE_ATTR_RO(branches);
   5188
   5189static struct attribute *lbr_attrs[] = {
   5190	&dev_attr_branches.attr,
   5191	NULL
   5192};
   5193
   5194static char pmu_name_str[30];
   5195
   5196static ssize_t pmu_name_show(struct device *cdev,
   5197			     struct device_attribute *attr,
   5198			     char *buf)
   5199{
   5200	return snprintf(buf, PAGE_SIZE, "%s\n", pmu_name_str);
   5201}
   5202
   5203static DEVICE_ATTR_RO(pmu_name);
   5204
   5205static struct attribute *intel_pmu_caps_attrs[] = {
   5206       &dev_attr_pmu_name.attr,
   5207       NULL
   5208};
   5209
   5210static DEVICE_ATTR(allow_tsx_force_abort, 0644,
   5211		   show_sysctl_tfa,
   5212		   set_sysctl_tfa);
   5213
   5214static struct attribute *intel_pmu_attrs[] = {
   5215	&dev_attr_freeze_on_smi.attr,
   5216	&dev_attr_allow_tsx_force_abort.attr,
   5217	NULL,
   5218};
   5219
   5220static umode_t
   5221tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
   5222{
   5223	return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0;
   5224}
   5225
   5226static umode_t
   5227pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
   5228{
   5229	return x86_pmu.pebs ? attr->mode : 0;
   5230}
   5231
   5232static umode_t
   5233lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
   5234{
   5235	return x86_pmu.lbr_nr ? attr->mode : 0;
   5236}
   5237
   5238static umode_t
   5239exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
   5240{
   5241	return x86_pmu.version >= 2 ? attr->mode : 0;
   5242}
   5243
   5244static umode_t
   5245default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
   5246{
   5247	if (attr == &dev_attr_allow_tsx_force_abort.attr)
   5248		return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
   5249
   5250	return attr->mode;
   5251}
   5252
   5253static struct attribute_group group_events_td  = {
   5254	.name = "events",
   5255};
   5256
   5257static struct attribute_group group_events_mem = {
   5258	.name       = "events",
   5259	.is_visible = pebs_is_visible,
   5260};
   5261
   5262static struct attribute_group group_events_tsx = {
   5263	.name       = "events",
   5264	.is_visible = tsx_is_visible,
   5265};
   5266
   5267static struct attribute_group group_caps_gen = {
   5268	.name  = "caps",
   5269	.attrs = intel_pmu_caps_attrs,
   5270};
   5271
   5272static struct attribute_group group_caps_lbr = {
   5273	.name       = "caps",
   5274	.attrs	    = lbr_attrs,
   5275	.is_visible = lbr_is_visible,
   5276};
   5277
   5278static struct attribute_group group_format_extra = {
   5279	.name       = "format",
   5280	.is_visible = exra_is_visible,
   5281};
   5282
   5283static struct attribute_group group_format_extra_skl = {
   5284	.name       = "format",
   5285	.is_visible = exra_is_visible,
   5286};
   5287
   5288static struct attribute_group group_default = {
   5289	.attrs      = intel_pmu_attrs,
   5290	.is_visible = default_is_visible,
   5291};
   5292
   5293static const struct attribute_group *attr_update[] = {
   5294	&group_events_td,
   5295	&group_events_mem,
   5296	&group_events_tsx,
   5297	&group_caps_gen,
   5298	&group_caps_lbr,
   5299	&group_format_extra,
   5300	&group_format_extra_skl,
   5301	&group_default,
   5302	NULL,
   5303};
   5304
   5305EVENT_ATTR_STR_HYBRID(slots,                 slots_adl,        "event=0x00,umask=0x4",                       hybrid_big);
   5306EVENT_ATTR_STR_HYBRID(topdown-retiring,      td_retiring_adl,  "event=0xc2,umask=0x0;event=0x00,umask=0x80", hybrid_big_small);
   5307EVENT_ATTR_STR_HYBRID(topdown-bad-spec,      td_bad_spec_adl,  "event=0x73,umask=0x0;event=0x00,umask=0x81", hybrid_big_small);
   5308EVENT_ATTR_STR_HYBRID(topdown-fe-bound,      td_fe_bound_adl,  "event=0x71,umask=0x0;event=0x00,umask=0x82", hybrid_big_small);
   5309EVENT_ATTR_STR_HYBRID(topdown-be-bound,      td_be_bound_adl,  "event=0x74,umask=0x0;event=0x00,umask=0x83", hybrid_big_small);
   5310EVENT_ATTR_STR_HYBRID(topdown-heavy-ops,     td_heavy_ops_adl, "event=0x00,umask=0x84",                      hybrid_big);
   5311EVENT_ATTR_STR_HYBRID(topdown-br-mispredict, td_br_mis_adl,    "event=0x00,umask=0x85",                      hybrid_big);
   5312EVENT_ATTR_STR_HYBRID(topdown-fetch-lat,     td_fetch_lat_adl, "event=0x00,umask=0x86",                      hybrid_big);
   5313EVENT_ATTR_STR_HYBRID(topdown-mem-bound,     td_mem_bound_adl, "event=0x00,umask=0x87",                      hybrid_big);
   5314
   5315static struct attribute *adl_hybrid_events_attrs[] = {
   5316	EVENT_PTR(slots_adl),
   5317	EVENT_PTR(td_retiring_adl),
   5318	EVENT_PTR(td_bad_spec_adl),
   5319	EVENT_PTR(td_fe_bound_adl),
   5320	EVENT_PTR(td_be_bound_adl),
   5321	EVENT_PTR(td_heavy_ops_adl),
   5322	EVENT_PTR(td_br_mis_adl),
   5323	EVENT_PTR(td_fetch_lat_adl),
   5324	EVENT_PTR(td_mem_bound_adl),
   5325	NULL,
   5326};
   5327
   5328/* Must be in IDX order */
   5329EVENT_ATTR_STR_HYBRID(mem-loads,     mem_ld_adl,     "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small);
   5330EVENT_ATTR_STR_HYBRID(mem-stores,    mem_st_adl,     "event=0xd0,umask=0x6;event=0xcd,umask=0x2",                 hybrid_big_small);
   5331EVENT_ATTR_STR_HYBRID(mem-loads-aux, mem_ld_aux_adl, "event=0x03,umask=0x82",                                     hybrid_big);
   5332
   5333static struct attribute *adl_hybrid_mem_attrs[] = {
   5334	EVENT_PTR(mem_ld_adl),
   5335	EVENT_PTR(mem_st_adl),
   5336	EVENT_PTR(mem_ld_aux_adl),
   5337	NULL,
   5338};
   5339
   5340EVENT_ATTR_STR_HYBRID(tx-start,          tx_start_adl,          "event=0xc9,umask=0x1",          hybrid_big);
   5341EVENT_ATTR_STR_HYBRID(tx-commit,         tx_commit_adl,         "event=0xc9,umask=0x2",          hybrid_big);
   5342EVENT_ATTR_STR_HYBRID(tx-abort,          tx_abort_adl,          "event=0xc9,umask=0x4",          hybrid_big);
   5343EVENT_ATTR_STR_HYBRID(tx-conflict,       tx_conflict_adl,       "event=0x54,umask=0x1",          hybrid_big);
   5344EVENT_ATTR_STR_HYBRID(cycles-t,          cycles_t_adl,          "event=0x3c,in_tx=1",            hybrid_big);
   5345EVENT_ATTR_STR_HYBRID(cycles-ct,         cycles_ct_adl,         "event=0x3c,in_tx=1,in_tx_cp=1", hybrid_big);
   5346EVENT_ATTR_STR_HYBRID(tx-capacity-read,  tx_capacity_read_adl,  "event=0x54,umask=0x80",         hybrid_big);
   5347EVENT_ATTR_STR_HYBRID(tx-capacity-write, tx_capacity_write_adl, "event=0x54,umask=0x2",          hybrid_big);
   5348
   5349static struct attribute *adl_hybrid_tsx_attrs[] = {
   5350	EVENT_PTR(tx_start_adl),
   5351	EVENT_PTR(tx_abort_adl),
   5352	EVENT_PTR(tx_commit_adl),
   5353	EVENT_PTR(tx_capacity_read_adl),
   5354	EVENT_PTR(tx_capacity_write_adl),
   5355	EVENT_PTR(tx_conflict_adl),
   5356	EVENT_PTR(cycles_t_adl),
   5357	EVENT_PTR(cycles_ct_adl),
   5358	NULL,
   5359};
   5360
   5361FORMAT_ATTR_HYBRID(in_tx,       hybrid_big);
   5362FORMAT_ATTR_HYBRID(in_tx_cp,    hybrid_big);
   5363FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small);
   5364FORMAT_ATTR_HYBRID(ldlat,       hybrid_big_small);
   5365FORMAT_ATTR_HYBRID(frontend,    hybrid_big);
   5366
   5367static struct attribute *adl_hybrid_extra_attr_rtm[] = {
   5368	FORMAT_HYBRID_PTR(in_tx),
   5369	FORMAT_HYBRID_PTR(in_tx_cp),
   5370	FORMAT_HYBRID_PTR(offcore_rsp),
   5371	FORMAT_HYBRID_PTR(ldlat),
   5372	FORMAT_HYBRID_PTR(frontend),
   5373	NULL,
   5374};
   5375
   5376static struct attribute *adl_hybrid_extra_attr[] = {
   5377	FORMAT_HYBRID_PTR(offcore_rsp),
   5378	FORMAT_HYBRID_PTR(ldlat),
   5379	FORMAT_HYBRID_PTR(frontend),
   5380	NULL,
   5381};
   5382
   5383static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr)
   5384{
   5385	struct device *dev = kobj_to_dev(kobj);
   5386	struct x86_hybrid_pmu *pmu =
   5387		container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu);
   5388	struct perf_pmu_events_hybrid_attr *pmu_attr =
   5389		container_of(attr, struct perf_pmu_events_hybrid_attr, attr.attr);
   5390
   5391	return pmu->cpu_type & pmu_attr->pmu_type;
   5392}
   5393
   5394static umode_t hybrid_events_is_visible(struct kobject *kobj,
   5395					struct attribute *attr, int i)
   5396{
   5397	return is_attr_for_this_pmu(kobj, attr) ? attr->mode : 0;
   5398}
   5399
   5400static inline int hybrid_find_supported_cpu(struct x86_hybrid_pmu *pmu)
   5401{
   5402	int cpu = cpumask_first(&pmu->supported_cpus);
   5403
   5404	return (cpu >= nr_cpu_ids) ? -1 : cpu;
   5405}
   5406
   5407static umode_t hybrid_tsx_is_visible(struct kobject *kobj,
   5408				     struct attribute *attr, int i)
   5409{
   5410	struct device *dev = kobj_to_dev(kobj);
   5411	struct x86_hybrid_pmu *pmu =
   5412		 container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu);
   5413	int cpu = hybrid_find_supported_cpu(pmu);
   5414
   5415	return (cpu >= 0) && is_attr_for_this_pmu(kobj, attr) && cpu_has(&cpu_data(cpu), X86_FEATURE_RTM) ? attr->mode : 0;
   5416}
   5417
   5418static umode_t hybrid_format_is_visible(struct kobject *kobj,
   5419					struct attribute *attr, int i)
   5420{
   5421	struct device *dev = kobj_to_dev(kobj);
   5422	struct x86_hybrid_pmu *pmu =
   5423		container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu);
   5424	struct perf_pmu_format_hybrid_attr *pmu_attr =
   5425		container_of(attr, struct perf_pmu_format_hybrid_attr, attr.attr);
   5426	int cpu = hybrid_find_supported_cpu(pmu);
   5427
   5428	return (cpu >= 0) && (pmu->cpu_type & pmu_attr->pmu_type) ? attr->mode : 0;
   5429}
   5430
   5431static struct attribute_group hybrid_group_events_td  = {
   5432	.name		= "events",
   5433	.is_visible	= hybrid_events_is_visible,
   5434};
   5435
   5436static struct attribute_group hybrid_group_events_mem = {
   5437	.name		= "events",
   5438	.is_visible	= hybrid_events_is_visible,
   5439};
   5440
   5441static struct attribute_group hybrid_group_events_tsx = {
   5442	.name		= "events",
   5443	.is_visible	= hybrid_tsx_is_visible,
   5444};
   5445
   5446static struct attribute_group hybrid_group_format_extra = {
   5447	.name		= "format",
   5448	.is_visible	= hybrid_format_is_visible,
   5449};
   5450
   5451static ssize_t intel_hybrid_get_attr_cpus(struct device *dev,
   5452					  struct device_attribute *attr,
   5453					  char *buf)
   5454{
   5455	struct x86_hybrid_pmu *pmu =
   5456		container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu);
   5457
   5458	return cpumap_print_to_pagebuf(true, buf, &pmu->supported_cpus);
   5459}
   5460
   5461static DEVICE_ATTR(cpus, S_IRUGO, intel_hybrid_get_attr_cpus, NULL);
   5462static struct attribute *intel_hybrid_cpus_attrs[] = {
   5463	&dev_attr_cpus.attr,
   5464	NULL,
   5465};
   5466
   5467static struct attribute_group hybrid_group_cpus = {
   5468	.attrs		= intel_hybrid_cpus_attrs,
   5469};
   5470
   5471static const struct attribute_group *hybrid_attr_update[] = {
   5472	&hybrid_group_events_td,
   5473	&hybrid_group_events_mem,
   5474	&hybrid_group_events_tsx,
   5475	&group_caps_gen,
   5476	&group_caps_lbr,
   5477	&hybrid_group_format_extra,
   5478	&group_default,
   5479	&hybrid_group_cpus,
   5480	NULL,
   5481};
   5482
   5483static struct attribute *empty_attrs;
   5484
   5485static void intel_pmu_check_num_counters(int *num_counters,
   5486					 int *num_counters_fixed,
   5487					 u64 *intel_ctrl, u64 fixed_mask)
   5488{
   5489	if (*num_counters > INTEL_PMC_MAX_GENERIC) {
   5490		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
   5491		     *num_counters, INTEL_PMC_MAX_GENERIC);
   5492		*num_counters = INTEL_PMC_MAX_GENERIC;
   5493	}
   5494	*intel_ctrl = (1ULL << *num_counters) - 1;
   5495
   5496	if (*num_counters_fixed > INTEL_PMC_MAX_FIXED) {
   5497		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
   5498		     *num_counters_fixed, INTEL_PMC_MAX_FIXED);
   5499		*num_counters_fixed = INTEL_PMC_MAX_FIXED;
   5500	}
   5501
   5502	*intel_ctrl |= fixed_mask << INTEL_PMC_IDX_FIXED;
   5503}
   5504
   5505static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
   5506					      int num_counters,
   5507					      int num_counters_fixed,
   5508					      u64 intel_ctrl)
   5509{
   5510	struct event_constraint *c;
   5511
   5512	if (!event_constraints)
   5513		return;
   5514
   5515	/*
   5516	 * event on fixed counter2 (REF_CYCLES) only works on this
   5517	 * counter, so do not extend mask to generic counters
   5518	 */
   5519	for_each_event_constraint(c, event_constraints) {
   5520		/*
   5521		 * Don't extend the topdown slots and metrics
   5522		 * events to the generic counters.
   5523		 */
   5524		if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) {
   5525			/*
   5526			 * Disable topdown slots and metrics events,
   5527			 * if slots event is not in CPUID.
   5528			 */
   5529			if (!(INTEL_PMC_MSK_FIXED_SLOTS & intel_ctrl))
   5530				c->idxmsk64 = 0;
   5531			c->weight = hweight64(c->idxmsk64);
   5532			continue;
   5533		}
   5534
   5535		if (c->cmask == FIXED_EVENT_FLAGS) {
   5536			/* Disabled fixed counters which are not in CPUID */
   5537			c->idxmsk64 &= intel_ctrl;
   5538
   5539			/*
   5540			 * Don't extend the pseudo-encoding to the
   5541			 * generic counters
   5542			 */
   5543			if (!use_fixed_pseudo_encoding(c->code))
   5544				c->idxmsk64 |= (1ULL << num_counters) - 1;
   5545		}
   5546		c->idxmsk64 &=
   5547			~(~0ULL << (INTEL_PMC_IDX_FIXED + num_counters_fixed));
   5548		c->weight = hweight64(c->idxmsk64);
   5549	}
   5550}
   5551
   5552static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs)
   5553{
   5554	struct extra_reg *er;
   5555
   5556	/*
   5557	 * Access extra MSR may cause #GP under certain circumstances.
   5558	 * E.g. KVM doesn't support offcore event
   5559	 * Check all extra_regs here.
   5560	 */
   5561	if (!extra_regs)
   5562		return;
   5563
   5564	for (er = extra_regs; er->msr; er++) {
   5565		er->extra_msr_access = check_msr(er->msr, 0x11UL);
   5566		/* Disable LBR select mapping */
   5567		if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
   5568			x86_pmu.lbr_sel_map = NULL;
   5569	}
   5570}
   5571
   5572static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
   5573{
   5574	struct x86_hybrid_pmu *pmu;
   5575	int i;
   5576
   5577	for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
   5578		pmu = &x86_pmu.hybrid_pmu[i];
   5579
   5580		intel_pmu_check_num_counters(&pmu->num_counters,
   5581					     &pmu->num_counters_fixed,
   5582					     &pmu->intel_ctrl,
   5583					     fixed_mask);
   5584
   5585		if (pmu->intel_cap.perf_metrics) {
   5586			pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
   5587			pmu->intel_ctrl |= INTEL_PMC_MSK_FIXED_SLOTS;
   5588		}
   5589
   5590		if (pmu->intel_cap.pebs_output_pt_available)
   5591			pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
   5592
   5593		intel_pmu_check_event_constraints(pmu->event_constraints,
   5594						  pmu->num_counters,
   5595						  pmu->num_counters_fixed,
   5596						  pmu->intel_ctrl);
   5597
   5598		intel_pmu_check_extra_regs(pmu->extra_regs);
   5599	}
   5600}
   5601
   5602__init int intel_pmu_init(void)
   5603{
   5604	struct attribute **extra_skl_attr = &empty_attrs;
   5605	struct attribute **extra_attr = &empty_attrs;
   5606	struct attribute **td_attr    = &empty_attrs;
   5607	struct attribute **mem_attr   = &empty_attrs;
   5608	struct attribute **tsx_attr   = &empty_attrs;
   5609	union cpuid10_edx edx;
   5610	union cpuid10_eax eax;
   5611	union cpuid10_ebx ebx;
   5612	unsigned int fixed_mask;
   5613	bool pmem = false;
   5614	int version, i;
   5615	char *name;
   5616	struct x86_hybrid_pmu *pmu;
   5617
   5618	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
   5619		switch (boot_cpu_data.x86) {
   5620		case 0x6:
   5621			return p6_pmu_init();
   5622		case 0xb:
   5623			return knc_pmu_init();
   5624		case 0xf:
   5625			return p4_pmu_init();
   5626		}
   5627		return -ENODEV;
   5628	}
   5629
   5630	/*
   5631	 * Check whether the Architectural PerfMon supports
   5632	 * Branch Misses Retired hw_event or not.
   5633	 */
   5634	cpuid(10, &eax.full, &ebx.full, &fixed_mask, &edx.full);
   5635	if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
   5636		return -ENODEV;
   5637
   5638	version = eax.split.version_id;
   5639	if (version < 2)
   5640		x86_pmu = core_pmu;
   5641	else
   5642		x86_pmu = intel_pmu;
   5643
   5644	x86_pmu.version			= version;
   5645	x86_pmu.num_counters		= eax.split.num_counters;
   5646	x86_pmu.cntval_bits		= eax.split.bit_width;
   5647	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;
   5648
   5649	x86_pmu.events_maskl		= ebx.full;
   5650	x86_pmu.events_mask_len		= eax.split.mask_length;
   5651
   5652	x86_pmu.max_pebs_events		= min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
   5653
   5654	/*
   5655	 * Quirk: v2 perfmon does not report fixed-purpose events, so
   5656	 * assume at least 3 events, when not running in a hypervisor:
   5657	 */
   5658	if (version > 1 && version < 5) {
   5659		int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
   5660
   5661		x86_pmu.num_counters_fixed =
   5662			max((int)edx.split.num_counters_fixed, assume);
   5663
   5664		fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1;
   5665	} else if (version >= 5)
   5666		x86_pmu.num_counters_fixed = fls(fixed_mask);
   5667
   5668	if (boot_cpu_has(X86_FEATURE_PDCM)) {
   5669		u64 capabilities;
   5670
   5671		rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
   5672		x86_pmu.intel_cap.capabilities = capabilities;
   5673	}
   5674
   5675	if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) {
   5676		x86_pmu.lbr_reset = intel_pmu_lbr_reset_32;
   5677		x86_pmu.lbr_read = intel_pmu_lbr_read_32;
   5678	}
   5679
   5680	if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
   5681		intel_pmu_arch_lbr_init();
   5682
   5683	intel_ds_init();
   5684
   5685	x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
   5686
   5687	if (version >= 5) {
   5688		x86_pmu.intel_cap.anythread_deprecated = edx.split.anythread_deprecated;
   5689		if (x86_pmu.intel_cap.anythread_deprecated)
   5690			pr_cont(" AnyThread deprecated, ");
   5691	}
   5692
   5693	/*
   5694	 * Install the hw-cache-events table:
   5695	 */
   5696	switch (boot_cpu_data.x86_model) {
   5697	case INTEL_FAM6_CORE_YONAH:
   5698		pr_cont("Core events, ");
   5699		name = "core";
   5700		break;
   5701
   5702	case INTEL_FAM6_CORE2_MEROM:
   5703		x86_add_quirk(intel_clovertown_quirk);
   5704		fallthrough;
   5705
   5706	case INTEL_FAM6_CORE2_MEROM_L:
   5707	case INTEL_FAM6_CORE2_PENRYN:
   5708	case INTEL_FAM6_CORE2_DUNNINGTON:
   5709		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
   5710		       sizeof(hw_cache_event_ids));
   5711
   5712		intel_pmu_lbr_init_core();
   5713
   5714		x86_pmu.event_constraints = intel_core2_event_constraints;
   5715		x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
   5716		pr_cont("Core2 events, ");
   5717		name = "core2";
   5718		break;
   5719
   5720	case INTEL_FAM6_NEHALEM:
   5721	case INTEL_FAM6_NEHALEM_EP:
   5722	case INTEL_FAM6_NEHALEM_EX:
   5723		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
   5724		       sizeof(hw_cache_event_ids));
   5725		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
   5726		       sizeof(hw_cache_extra_regs));
   5727
   5728		intel_pmu_lbr_init_nhm();
   5729
   5730		x86_pmu.event_constraints = intel_nehalem_event_constraints;
   5731		x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
   5732		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
   5733		x86_pmu.extra_regs = intel_nehalem_extra_regs;
   5734		x86_pmu.limit_period = nhm_limit_period;
   5735
   5736		mem_attr = nhm_mem_events_attrs;
   5737
   5738		/* UOPS_ISSUED.STALLED_CYCLES */
   5739		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
   5740			X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
   5741		/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
   5742		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
   5743			X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
   5744
   5745		intel_pmu_pebs_data_source_nhm();
   5746		x86_add_quirk(intel_nehalem_quirk);
   5747		x86_pmu.pebs_no_tlb = 1;
   5748		extra_attr = nhm_format_attr;
   5749
   5750		pr_cont("Nehalem events, ");
   5751		name = "nehalem";
   5752		break;
   5753
   5754	case INTEL_FAM6_ATOM_BONNELL:
   5755	case INTEL_FAM6_ATOM_BONNELL_MID:
   5756	case INTEL_FAM6_ATOM_SALTWELL:
   5757	case INTEL_FAM6_ATOM_SALTWELL_MID:
   5758	case INTEL_FAM6_ATOM_SALTWELL_TABLET:
   5759		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
   5760		       sizeof(hw_cache_event_ids));
   5761
   5762		intel_pmu_lbr_init_atom();
   5763
   5764		x86_pmu.event_constraints = intel_gen_event_constraints;
   5765		x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
   5766		x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
   5767		pr_cont("Atom events, ");
   5768		name = "bonnell";
   5769		break;
   5770
   5771	case INTEL_FAM6_ATOM_SILVERMONT:
   5772	case INTEL_FAM6_ATOM_SILVERMONT_D:
   5773	case INTEL_FAM6_ATOM_SILVERMONT_MID:
   5774	case INTEL_FAM6_ATOM_AIRMONT:
   5775	case INTEL_FAM6_ATOM_AIRMONT_MID:
   5776		memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
   5777			sizeof(hw_cache_event_ids));
   5778		memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
   5779		       sizeof(hw_cache_extra_regs));
   5780
   5781		intel_pmu_lbr_init_slm();
   5782
   5783		x86_pmu.event_constraints = intel_slm_event_constraints;
   5784		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
   5785		x86_pmu.extra_regs = intel_slm_extra_regs;
   5786		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   5787		td_attr = slm_events_attrs;
   5788		extra_attr = slm_format_attr;
   5789		pr_cont("Silvermont events, ");
   5790		name = "silvermont";
   5791		break;
   5792
   5793	case INTEL_FAM6_ATOM_GOLDMONT:
   5794	case INTEL_FAM6_ATOM_GOLDMONT_D:
   5795		memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
   5796		       sizeof(hw_cache_event_ids));
   5797		memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
   5798		       sizeof(hw_cache_extra_regs));
   5799
   5800		intel_pmu_lbr_init_skl();
   5801
   5802		x86_pmu.event_constraints = intel_slm_event_constraints;
   5803		x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
   5804		x86_pmu.extra_regs = intel_glm_extra_regs;
   5805		/*
   5806		 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
   5807		 * for precise cycles.
   5808		 * :pp is identical to :ppp
   5809		 */
   5810		x86_pmu.pebs_aliases = NULL;
   5811		x86_pmu.pebs_prec_dist = true;
   5812		x86_pmu.lbr_pt_coexist = true;
   5813		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   5814		td_attr = glm_events_attrs;
   5815		extra_attr = slm_format_attr;
   5816		pr_cont("Goldmont events, ");
   5817		name = "goldmont";
   5818		break;
   5819
   5820	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
   5821		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
   5822		       sizeof(hw_cache_event_ids));
   5823		memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
   5824		       sizeof(hw_cache_extra_regs));
   5825
   5826		intel_pmu_lbr_init_skl();
   5827
   5828		x86_pmu.event_constraints = intel_slm_event_constraints;
   5829		x86_pmu.extra_regs = intel_glm_extra_regs;
   5830		/*
   5831		 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
   5832		 * for precise cycles.
   5833		 */
   5834		x86_pmu.pebs_aliases = NULL;
   5835		x86_pmu.pebs_prec_dist = true;
   5836		x86_pmu.lbr_pt_coexist = true;
   5837		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   5838		x86_pmu.flags |= PMU_FL_PEBS_ALL;
   5839		x86_pmu.get_event_constraints = glp_get_event_constraints;
   5840		td_attr = glm_events_attrs;
   5841		/* Goldmont Plus has 4-wide pipeline */
   5842		event_attr_td_total_slots_scale_glm.event_str = "4";
   5843		extra_attr = slm_format_attr;
   5844		pr_cont("Goldmont plus events, ");
   5845		name = "goldmont_plus";
   5846		break;
   5847
   5848	case INTEL_FAM6_ATOM_TREMONT_D:
   5849	case INTEL_FAM6_ATOM_TREMONT:
   5850	case INTEL_FAM6_ATOM_TREMONT_L:
   5851		x86_pmu.late_ack = true;
   5852		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
   5853		       sizeof(hw_cache_event_ids));
   5854		memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
   5855		       sizeof(hw_cache_extra_regs));
   5856		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
   5857
   5858		intel_pmu_lbr_init_skl();
   5859
   5860		x86_pmu.event_constraints = intel_slm_event_constraints;
   5861		x86_pmu.extra_regs = intel_tnt_extra_regs;
   5862		/*
   5863		 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
   5864		 * for precise cycles.
   5865		 */
   5866		x86_pmu.pebs_aliases = NULL;
   5867		x86_pmu.pebs_prec_dist = true;
   5868		x86_pmu.lbr_pt_coexist = true;
   5869		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   5870		x86_pmu.get_event_constraints = tnt_get_event_constraints;
   5871		td_attr = tnt_events_attrs;
   5872		extra_attr = slm_format_attr;
   5873		pr_cont("Tremont events, ");
   5874		name = "Tremont";
   5875		break;
   5876
   5877	case INTEL_FAM6_WESTMERE:
   5878	case INTEL_FAM6_WESTMERE_EP:
   5879	case INTEL_FAM6_WESTMERE_EX:
   5880		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
   5881		       sizeof(hw_cache_event_ids));
   5882		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
   5883		       sizeof(hw_cache_extra_regs));
   5884
   5885		intel_pmu_lbr_init_nhm();
   5886
   5887		x86_pmu.event_constraints = intel_westmere_event_constraints;
   5888		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
   5889		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
   5890		x86_pmu.extra_regs = intel_westmere_extra_regs;
   5891		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   5892
   5893		mem_attr = nhm_mem_events_attrs;
   5894
   5895		/* UOPS_ISSUED.STALLED_CYCLES */
   5896		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
   5897			X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
   5898		/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
   5899		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
   5900			X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
   5901
   5902		intel_pmu_pebs_data_source_nhm();
   5903		extra_attr = nhm_format_attr;
   5904		pr_cont("Westmere events, ");
   5905		name = "westmere";
   5906		break;
   5907
   5908	case INTEL_FAM6_SANDYBRIDGE:
   5909	case INTEL_FAM6_SANDYBRIDGE_X:
   5910		x86_add_quirk(intel_sandybridge_quirk);
   5911		x86_add_quirk(intel_ht_bug);
   5912		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
   5913		       sizeof(hw_cache_event_ids));
   5914		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
   5915		       sizeof(hw_cache_extra_regs));
   5916
   5917		intel_pmu_lbr_init_snb();
   5918
   5919		x86_pmu.event_constraints = intel_snb_event_constraints;
   5920		x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
   5921		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
   5922		if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
   5923			x86_pmu.extra_regs = intel_snbep_extra_regs;
   5924		else
   5925			x86_pmu.extra_regs = intel_snb_extra_regs;
   5926
   5927
   5928		/* all extra regs are per-cpu when HT is on */
   5929		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   5930		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
   5931
   5932		td_attr  = snb_events_attrs;
   5933		mem_attr = snb_mem_events_attrs;
   5934
   5935		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
   5936		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
   5937			X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
   5938		/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
   5939		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
   5940			X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
   5941
   5942		extra_attr = nhm_format_attr;
   5943
   5944		pr_cont("SandyBridge events, ");
   5945		name = "sandybridge";
   5946		break;
   5947
   5948	case INTEL_FAM6_IVYBRIDGE:
   5949	case INTEL_FAM6_IVYBRIDGE_X:
   5950		x86_add_quirk(intel_ht_bug);
   5951		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
   5952		       sizeof(hw_cache_event_ids));
   5953		/* dTLB-load-misses on IVB is different than SNB */
   5954		hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
   5955
   5956		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
   5957		       sizeof(hw_cache_extra_regs));
   5958
   5959		intel_pmu_lbr_init_snb();
   5960
   5961		x86_pmu.event_constraints = intel_ivb_event_constraints;
   5962		x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
   5963		x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
   5964		x86_pmu.pebs_prec_dist = true;
   5965		if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
   5966			x86_pmu.extra_regs = intel_snbep_extra_regs;
   5967		else
   5968			x86_pmu.extra_regs = intel_snb_extra_regs;
   5969		/* all extra regs are per-cpu when HT is on */
   5970		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   5971		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
   5972
   5973		td_attr  = snb_events_attrs;
   5974		mem_attr = snb_mem_events_attrs;
   5975
   5976		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
   5977		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
   5978			X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
   5979
   5980		extra_attr = nhm_format_attr;
   5981
   5982		pr_cont("IvyBridge events, ");
   5983		name = "ivybridge";
   5984		break;
   5985
   5986
   5987	case INTEL_FAM6_HASWELL:
   5988	case INTEL_FAM6_HASWELL_X:
   5989	case INTEL_FAM6_HASWELL_L:
   5990	case INTEL_FAM6_HASWELL_G:
   5991		x86_add_quirk(intel_ht_bug);
   5992		x86_add_quirk(intel_pebs_isolation_quirk);
   5993		x86_pmu.late_ack = true;
   5994		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
   5995		memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
   5996
   5997		intel_pmu_lbr_init_hsw();
   5998
   5999		x86_pmu.event_constraints = intel_hsw_event_constraints;
   6000		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
   6001		x86_pmu.extra_regs = intel_snbep_extra_regs;
   6002		x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
   6003		x86_pmu.pebs_prec_dist = true;
   6004		/* all extra regs are per-cpu when HT is on */
   6005		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   6006		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
   6007
   6008		x86_pmu.hw_config = hsw_hw_config;
   6009		x86_pmu.get_event_constraints = hsw_get_event_constraints;
   6010		x86_pmu.lbr_double_abort = true;
   6011		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
   6012			hsw_format_attr : nhm_format_attr;
   6013		td_attr  = hsw_events_attrs;
   6014		mem_attr = hsw_mem_events_attrs;
   6015		tsx_attr = hsw_tsx_events_attrs;
   6016		pr_cont("Haswell events, ");
   6017		name = "haswell";
   6018		break;
   6019
   6020	case INTEL_FAM6_BROADWELL:
   6021	case INTEL_FAM6_BROADWELL_D:
   6022	case INTEL_FAM6_BROADWELL_G:
   6023	case INTEL_FAM6_BROADWELL_X:
   6024		x86_add_quirk(intel_pebs_isolation_quirk);
   6025		x86_pmu.late_ack = true;
   6026		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
   6027		memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
   6028
   6029		/* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */
   6030		hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ |
   6031									 BDW_L3_MISS|HSW_SNOOP_DRAM;
   6032		hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS|
   6033									  HSW_SNOOP_DRAM;
   6034		hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ|
   6035									     BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
   6036		hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
   6037									      BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
   6038
   6039		intel_pmu_lbr_init_hsw();
   6040
   6041		x86_pmu.event_constraints = intel_bdw_event_constraints;
   6042		x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
   6043		x86_pmu.extra_regs = intel_snbep_extra_regs;
   6044		x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
   6045		x86_pmu.pebs_prec_dist = true;
   6046		/* all extra regs are per-cpu when HT is on */
   6047		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   6048		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
   6049
   6050		x86_pmu.hw_config = hsw_hw_config;
   6051		x86_pmu.get_event_constraints = hsw_get_event_constraints;
   6052		x86_pmu.limit_period = bdw_limit_period;
   6053		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
   6054			hsw_format_attr : nhm_format_attr;
   6055		td_attr  = hsw_events_attrs;
   6056		mem_attr = hsw_mem_events_attrs;
   6057		tsx_attr = hsw_tsx_events_attrs;
   6058		pr_cont("Broadwell events, ");
   6059		name = "broadwell";
   6060		break;
   6061
   6062	case INTEL_FAM6_XEON_PHI_KNL:
   6063	case INTEL_FAM6_XEON_PHI_KNM:
   6064		memcpy(hw_cache_event_ids,
   6065		       slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
   6066		memcpy(hw_cache_extra_regs,
   6067		       knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
   6068		intel_pmu_lbr_init_knl();
   6069
   6070		x86_pmu.event_constraints = intel_slm_event_constraints;
   6071		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
   6072		x86_pmu.extra_regs = intel_knl_extra_regs;
   6073
   6074		/* all extra regs are per-cpu when HT is on */
   6075		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   6076		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
   6077		extra_attr = slm_format_attr;
   6078		pr_cont("Knights Landing/Mill events, ");
   6079		name = "knights-landing";
   6080		break;
   6081
   6082	case INTEL_FAM6_SKYLAKE_X:
   6083		pmem = true;
   6084		fallthrough;
   6085	case INTEL_FAM6_SKYLAKE_L:
   6086	case INTEL_FAM6_SKYLAKE:
   6087	case INTEL_FAM6_KABYLAKE_L:
   6088	case INTEL_FAM6_KABYLAKE:
   6089	case INTEL_FAM6_COMETLAKE_L:
   6090	case INTEL_FAM6_COMETLAKE:
   6091		x86_add_quirk(intel_pebs_isolation_quirk);
   6092		x86_pmu.late_ack = true;
   6093		memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
   6094		memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
   6095		intel_pmu_lbr_init_skl();
   6096
   6097		/* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
   6098		event_attr_td_recovery_bubbles.event_str_noht =
   6099			"event=0xd,umask=0x1,cmask=1";
   6100		event_attr_td_recovery_bubbles.event_str_ht =
   6101			"event=0xd,umask=0x1,cmask=1,any=1";
   6102
   6103		x86_pmu.event_constraints = intel_skl_event_constraints;
   6104		x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
   6105		x86_pmu.extra_regs = intel_skl_extra_regs;
   6106		x86_pmu.pebs_aliases = intel_pebs_aliases_skl;
   6107		x86_pmu.pebs_prec_dist = true;
   6108		/* all extra regs are per-cpu when HT is on */
   6109		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   6110		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
   6111
   6112		x86_pmu.hw_config = hsw_hw_config;
   6113		x86_pmu.get_event_constraints = hsw_get_event_constraints;
   6114		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
   6115			hsw_format_attr : nhm_format_attr;
   6116		extra_skl_attr = skl_format_attr;
   6117		td_attr  = hsw_events_attrs;
   6118		mem_attr = hsw_mem_events_attrs;
   6119		tsx_attr = hsw_tsx_events_attrs;
   6120		intel_pmu_pebs_data_source_skl(pmem);
   6121
   6122		/*
   6123		 * Processors with CPUID.RTM_ALWAYS_ABORT have TSX deprecated by default.
   6124		 * TSX force abort hooks are not required on these systems. Only deploy
   6125		 * workaround when microcode has not enabled X86_FEATURE_RTM_ALWAYS_ABORT.
   6126		 */
   6127		if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) &&
   6128		   !boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) {
   6129			x86_pmu.flags |= PMU_FL_TFA;
   6130			x86_pmu.get_event_constraints = tfa_get_event_constraints;
   6131			x86_pmu.enable_all = intel_tfa_pmu_enable_all;
   6132			x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
   6133		}
   6134
   6135		pr_cont("Skylake events, ");
   6136		name = "skylake";
   6137		break;
   6138
   6139	case INTEL_FAM6_ICELAKE_X:
   6140	case INTEL_FAM6_ICELAKE_D:
   6141		pmem = true;
   6142		fallthrough;
   6143	case INTEL_FAM6_ICELAKE_L:
   6144	case INTEL_FAM6_ICELAKE:
   6145	case INTEL_FAM6_TIGERLAKE_L:
   6146	case INTEL_FAM6_TIGERLAKE:
   6147	case INTEL_FAM6_ROCKETLAKE:
   6148		x86_pmu.late_ack = true;
   6149		memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
   6150		memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
   6151		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
   6152		intel_pmu_lbr_init_skl();
   6153
   6154		x86_pmu.event_constraints = intel_icl_event_constraints;
   6155		x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints;
   6156		x86_pmu.extra_regs = intel_icl_extra_regs;
   6157		x86_pmu.pebs_aliases = NULL;
   6158		x86_pmu.pebs_prec_dist = true;
   6159		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   6160		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
   6161
   6162		x86_pmu.hw_config = hsw_hw_config;
   6163		x86_pmu.get_event_constraints = icl_get_event_constraints;
   6164		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
   6165			hsw_format_attr : nhm_format_attr;
   6166		extra_skl_attr = skl_format_attr;
   6167		mem_attr = icl_events_attrs;
   6168		td_attr = icl_td_events_attrs;
   6169		tsx_attr = icl_tsx_events_attrs;
   6170		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
   6171		x86_pmu.lbr_pt_coexist = true;
   6172		intel_pmu_pebs_data_source_skl(pmem);
   6173		x86_pmu.num_topdown_events = 4;
   6174		x86_pmu.update_topdown_event = icl_update_topdown_event;
   6175		x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
   6176		pr_cont("Icelake events, ");
   6177		name = "icelake";
   6178		break;
   6179
   6180	case INTEL_FAM6_SAPPHIRERAPIDS_X:
   6181		pmem = true;
   6182		x86_pmu.late_ack = true;
   6183		memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
   6184		memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
   6185
   6186		x86_pmu.event_constraints = intel_spr_event_constraints;
   6187		x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
   6188		x86_pmu.extra_regs = intel_spr_extra_regs;
   6189		x86_pmu.limit_period = spr_limit_period;
   6190		x86_pmu.pebs_aliases = NULL;
   6191		x86_pmu.pebs_prec_dist = true;
   6192		x86_pmu.pebs_block = true;
   6193		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   6194		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
   6195		x86_pmu.flags |= PMU_FL_PEBS_ALL;
   6196		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
   6197		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
   6198
   6199		x86_pmu.hw_config = hsw_hw_config;
   6200		x86_pmu.get_event_constraints = spr_get_event_constraints;
   6201		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
   6202			hsw_format_attr : nhm_format_attr;
   6203		extra_skl_attr = skl_format_attr;
   6204		mem_attr = spr_events_attrs;
   6205		td_attr = spr_td_events_attrs;
   6206		tsx_attr = spr_tsx_events_attrs;
   6207		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
   6208		x86_pmu.lbr_pt_coexist = true;
   6209		intel_pmu_pebs_data_source_skl(pmem);
   6210		x86_pmu.num_topdown_events = 8;
   6211		x86_pmu.update_topdown_event = icl_update_topdown_event;
   6212		x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
   6213		pr_cont("Sapphire Rapids events, ");
   6214		name = "sapphire_rapids";
   6215		break;
   6216
   6217	case INTEL_FAM6_ALDERLAKE:
   6218	case INTEL_FAM6_ALDERLAKE_L:
   6219	case INTEL_FAM6_ALDERLAKE_N:
   6220	case INTEL_FAM6_RAPTORLAKE:
   6221	case INTEL_FAM6_RAPTORLAKE_P:
   6222		/*
   6223		 * Alder Lake has 2 types of CPU, core and atom.
   6224		 *
   6225		 * Initialize the common PerfMon capabilities here.
   6226		 */
   6227		x86_pmu.hybrid_pmu = kcalloc(X86_HYBRID_NUM_PMUS,
   6228					     sizeof(struct x86_hybrid_pmu),
   6229					     GFP_KERNEL);
   6230		if (!x86_pmu.hybrid_pmu)
   6231			return -ENOMEM;
   6232		static_branch_enable(&perf_is_hybrid);
   6233		x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;
   6234
   6235		x86_pmu.pebs_aliases = NULL;
   6236		x86_pmu.pebs_prec_dist = true;
   6237		x86_pmu.pebs_block = true;
   6238		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
   6239		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
   6240		x86_pmu.flags |= PMU_FL_PEBS_ALL;
   6241		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
   6242		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
   6243		x86_pmu.lbr_pt_coexist = true;
   6244		intel_pmu_pebs_data_source_skl(false);
   6245		x86_pmu.num_topdown_events = 8;
   6246		x86_pmu.update_topdown_event = adl_update_topdown_event;
   6247		x86_pmu.set_topdown_event_period = adl_set_topdown_event_period;
   6248
   6249		x86_pmu.filter_match = intel_pmu_filter_match;
   6250		x86_pmu.get_event_constraints = adl_get_event_constraints;
   6251		x86_pmu.hw_config = adl_hw_config;
   6252		x86_pmu.limit_period = spr_limit_period;
   6253		x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type;
   6254		/*
   6255		 * The rtm_abort_event is used to check whether to enable GPRs
   6256		 * for the RTM abort event. Atom doesn't have the RTM abort
   6257		 * event. There is no harmful to set it in the common
   6258		 * x86_pmu.rtm_abort_event.
   6259		 */
   6260		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
   6261
   6262		td_attr = adl_hybrid_events_attrs;
   6263		mem_attr = adl_hybrid_mem_attrs;
   6264		tsx_attr = adl_hybrid_tsx_attrs;
   6265		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
   6266			adl_hybrid_extra_attr_rtm : adl_hybrid_extra_attr;
   6267
   6268		/* Initialize big core specific PerfMon capabilities.*/
   6269		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
   6270		pmu->name = "cpu_core";
   6271		pmu->cpu_type = hybrid_big;
   6272		pmu->late_ack = true;
   6273		if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
   6274			pmu->num_counters = x86_pmu.num_counters + 2;
   6275			pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
   6276		} else {
   6277			pmu->num_counters = x86_pmu.num_counters;
   6278			pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
   6279		}
   6280
   6281		/*
   6282		 * Quirk: For some Alder Lake machine, when all E-cores are disabled in
   6283		 * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However,
   6284		 * the X86_FEATURE_HYBRID_CPU is still set. The above codes will
   6285		 * mistakenly add extra counters for P-cores. Correct the number of
   6286		 * counters here.
   6287		 */
   6288		if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
   6289			pmu->num_counters = x86_pmu.num_counters;
   6290			pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
   6291		}
   6292
   6293		pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
   6294		pmu->unconstrained = (struct event_constraint)
   6295					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
   6296							   0, pmu->num_counters, 0, 0);
   6297		pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
   6298		pmu->intel_cap.perf_metrics = 1;
   6299		pmu->intel_cap.pebs_output_pt_available = 0;
   6300
   6301		memcpy(pmu->hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
   6302		memcpy(pmu->hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
   6303		pmu->event_constraints = intel_spr_event_constraints;
   6304		pmu->pebs_constraints = intel_spr_pebs_event_constraints;
   6305		pmu->extra_regs = intel_spr_extra_regs;
   6306
   6307		/* Initialize Atom core specific PerfMon capabilities.*/
   6308		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
   6309		pmu->name = "cpu_atom";
   6310		pmu->cpu_type = hybrid_small;
   6311		pmu->mid_ack = true;
   6312		pmu->num_counters = x86_pmu.num_counters;
   6313		pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
   6314		pmu->max_pebs_events = x86_pmu.max_pebs_events;
   6315		pmu->unconstrained = (struct event_constraint)
   6316					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
   6317							   0, pmu->num_counters, 0, 0);
   6318		pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
   6319		pmu->intel_cap.perf_metrics = 0;
   6320		pmu->intel_cap.pebs_output_pt_available = 1;
   6321
   6322		memcpy(pmu->hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
   6323		memcpy(pmu->hw_cache_extra_regs, tnt_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
   6324		pmu->hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
   6325		pmu->event_constraints = intel_slm_event_constraints;
   6326		pmu->pebs_constraints = intel_grt_pebs_event_constraints;
   6327		pmu->extra_regs = intel_grt_extra_regs;
   6328		pr_cont("Alderlake Hybrid events, ");
   6329		name = "alderlake_hybrid";
   6330		break;
   6331
   6332	default:
   6333		switch (x86_pmu.version) {
   6334		case 1:
   6335			x86_pmu.event_constraints = intel_v1_event_constraints;
   6336			pr_cont("generic architected perfmon v1, ");
   6337			name = "generic_arch_v1";
   6338			break;
   6339		case 2:
   6340		case 3:
   6341		case 4:
   6342			/*
   6343			 * default constraints for v2 and up
   6344			 */
   6345			x86_pmu.event_constraints = intel_gen_event_constraints;
   6346			pr_cont("generic architected perfmon, ");
   6347			name = "generic_arch_v2+";
   6348			break;
   6349		default:
   6350			/*
   6351			 * The default constraints for v5 and up can support up to
   6352			 * 16 fixed counters. For the fixed counters 4 and later,
   6353			 * the pseudo-encoding is applied.
   6354			 * The constraints may be cut according to the CPUID enumeration
   6355			 * by inserting the EVENT_CONSTRAINT_END.
   6356			 */
   6357			if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED)
   6358				x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
   6359			intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1;
   6360			x86_pmu.event_constraints = intel_v5_gen_event_constraints;
   6361			pr_cont("generic architected perfmon, ");
   6362			name = "generic_arch_v5+";
   6363			break;
   6364		}
   6365	}
   6366
   6367	snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name);
   6368
   6369	if (!is_hybrid()) {
   6370		group_events_td.attrs  = td_attr;
   6371		group_events_mem.attrs = mem_attr;
   6372		group_events_tsx.attrs = tsx_attr;
   6373		group_format_extra.attrs = extra_attr;
   6374		group_format_extra_skl.attrs = extra_skl_attr;
   6375
   6376		x86_pmu.attr_update = attr_update;
   6377	} else {
   6378		hybrid_group_events_td.attrs  = td_attr;
   6379		hybrid_group_events_mem.attrs = mem_attr;
   6380		hybrid_group_events_tsx.attrs = tsx_attr;
   6381		hybrid_group_format_extra.attrs = extra_attr;
   6382
   6383		x86_pmu.attr_update = hybrid_attr_update;
   6384	}
   6385
   6386	intel_pmu_check_num_counters(&x86_pmu.num_counters,
   6387				     &x86_pmu.num_counters_fixed,
   6388				     &x86_pmu.intel_ctrl,
   6389				     (u64)fixed_mask);
   6390
   6391	/* AnyThread may be deprecated on arch perfmon v5 or later */
   6392	if (x86_pmu.intel_cap.anythread_deprecated)
   6393		x86_pmu.format_attrs = intel_arch_formats_attr;
   6394
   6395	intel_pmu_check_event_constraints(x86_pmu.event_constraints,
   6396					  x86_pmu.num_counters,
   6397					  x86_pmu.num_counters_fixed,
   6398					  x86_pmu.intel_ctrl);
   6399	/*
   6400	 * Access LBR MSR may cause #GP under certain circumstances.
   6401	 * E.g. KVM doesn't support LBR MSR
   6402	 * Check all LBT MSR here.
   6403	 * Disable LBR access if any LBR MSRs can not be accessed.
   6404	 */
   6405	if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL))
   6406		x86_pmu.lbr_nr = 0;
   6407	for (i = 0; i < x86_pmu.lbr_nr; i++) {
   6408		if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
   6409		      check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
   6410			x86_pmu.lbr_nr = 0;
   6411	}
   6412
   6413	if (x86_pmu.lbr_nr) {
   6414		intel_pmu_lbr_init();
   6415
   6416		pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
   6417
   6418		/* only support branch_stack snapshot for perfmon >= v2 */
   6419		if (x86_pmu.disable_all == intel_pmu_disable_all) {
   6420			if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) {
   6421				static_call_update(perf_snapshot_branch_stack,
   6422						   intel_pmu_snapshot_arch_branch_stack);
   6423			} else {
   6424				static_call_update(perf_snapshot_branch_stack,
   6425						   intel_pmu_snapshot_branch_stack);
   6426			}
   6427		}
   6428	}
   6429
   6430	intel_pmu_check_extra_regs(x86_pmu.extra_regs);
   6431
   6432	/* Support full width counters using alternative MSR range */
   6433	if (x86_pmu.intel_cap.full_width_write) {
   6434		x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
   6435		x86_pmu.perfctr = MSR_IA32_PMC0;
   6436		pr_cont("full-width counters, ");
   6437	}
   6438
   6439	if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
   6440		x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
   6441
   6442	if (is_hybrid())
   6443		intel_pmu_check_hybrid_pmus((u64)fixed_mask);
   6444
   6445	intel_aux_output_init();
   6446
   6447	return 0;
   6448}
   6449
   6450/*
   6451 * HT bug: phase 2 init
   6452 * Called once we have valid topology information to check
   6453 * whether or not HT is enabled
   6454 * If HT is off, then we disable the workaround
   6455 */
   6456static __init int fixup_ht_bug(void)
   6457{
   6458	int c;
   6459	/*
   6460	 * problem not present on this CPU model, nothing to do
   6461	 */
   6462	if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
   6463		return 0;
   6464
   6465	if (topology_max_smt_threads() > 1) {
   6466		pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
   6467		return 0;
   6468	}
   6469
   6470	cpus_read_lock();
   6471
   6472	hardlockup_detector_perf_stop();
   6473
   6474	x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
   6475
   6476	x86_pmu.start_scheduling = NULL;
   6477	x86_pmu.commit_scheduling = NULL;
   6478	x86_pmu.stop_scheduling = NULL;
   6479
   6480	hardlockup_detector_perf_restart();
   6481
   6482	for_each_online_cpu(c)
   6483		free_excl_cntrs(&per_cpu(cpu_hw_events, c));
   6484
   6485	cpus_read_unlock();
   6486	pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
   6487	return 0;
   6488}
   6489subsys_initcall(fixup_ht_bug)