cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

core.c (39421B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2#include <linux/perf_event.h>
      3#include <linux/jump_label.h>
      4#include <linux/export.h>
      5#include <linux/types.h>
      6#include <linux/init.h>
      7#include <linux/slab.h>
      8#include <linux/delay.h>
      9#include <linux/jiffies.h>
     10#include <asm/apicdef.h>
     11#include <asm/apic.h>
     12#include <asm/nmi.h>
     13
     14#include "../perf_event.h"
     15
     16static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
     17static unsigned long perf_nmi_window;
     18
     19/* AMD Event 0xFFF: Merge.  Used with Large Increment per Cycle events */
     20#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
     21#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
     22
     23/* PMC Enable and Overflow bits for PerfCntrGlobal* registers */
     24static u64 amd_pmu_global_cntr_mask __read_mostly;
     25
     26static __initconst const u64 amd_hw_cache_event_ids
     27				[PERF_COUNT_HW_CACHE_MAX]
     28				[PERF_COUNT_HW_CACHE_OP_MAX]
     29				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
     30{
     31 [ C(L1D) ] = {
     32	[ C(OP_READ) ] = {
     33		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
     34		[ C(RESULT_MISS)   ] = 0x0141, /* Data Cache Misses          */
     35	},
     36	[ C(OP_WRITE) ] = {
     37		[ C(RESULT_ACCESS) ] = 0,
     38		[ C(RESULT_MISS)   ] = 0,
     39	},
     40	[ C(OP_PREFETCH) ] = {
     41		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
     42		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
     43	},
     44 },
     45 [ C(L1I ) ] = {
     46	[ C(OP_READ) ] = {
     47		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
     48		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
     49	},
     50	[ C(OP_WRITE) ] = {
     51		[ C(RESULT_ACCESS) ] = -1,
     52		[ C(RESULT_MISS)   ] = -1,
     53	},
     54	[ C(OP_PREFETCH) ] = {
     55		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
     56		[ C(RESULT_MISS)   ] = 0,
     57	},
     58 },
     59 [ C(LL  ) ] = {
     60	[ C(OP_READ) ] = {
     61		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
     62		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
     63	},
     64	[ C(OP_WRITE) ] = {
     65		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
     66		[ C(RESULT_MISS)   ] = 0,
     67	},
     68	[ C(OP_PREFETCH) ] = {
     69		[ C(RESULT_ACCESS) ] = 0,
     70		[ C(RESULT_MISS)   ] = 0,
     71	},
     72 },
     73 [ C(DTLB) ] = {
     74	[ C(OP_READ) ] = {
     75		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
     76		[ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
     77	},
     78	[ C(OP_WRITE) ] = {
     79		[ C(RESULT_ACCESS) ] = 0,
     80		[ C(RESULT_MISS)   ] = 0,
     81	},
     82	[ C(OP_PREFETCH) ] = {
     83		[ C(RESULT_ACCESS) ] = 0,
     84		[ C(RESULT_MISS)   ] = 0,
     85	},
     86 },
     87 [ C(ITLB) ] = {
     88	[ C(OP_READ) ] = {
     89		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
     90		[ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
     91	},
     92	[ C(OP_WRITE) ] = {
     93		[ C(RESULT_ACCESS) ] = -1,
     94		[ C(RESULT_MISS)   ] = -1,
     95	},
     96	[ C(OP_PREFETCH) ] = {
     97		[ C(RESULT_ACCESS) ] = -1,
     98		[ C(RESULT_MISS)   ] = -1,
     99	},
    100 },
    101 [ C(BPU ) ] = {
    102	[ C(OP_READ) ] = {
    103		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
    104		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
    105	},
    106	[ C(OP_WRITE) ] = {
    107		[ C(RESULT_ACCESS) ] = -1,
    108		[ C(RESULT_MISS)   ] = -1,
    109	},
    110	[ C(OP_PREFETCH) ] = {
    111		[ C(RESULT_ACCESS) ] = -1,
    112		[ C(RESULT_MISS)   ] = -1,
    113	},
    114 },
    115 [ C(NODE) ] = {
    116	[ C(OP_READ) ] = {
    117		[ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
    118		[ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */
    119	},
    120	[ C(OP_WRITE) ] = {
    121		[ C(RESULT_ACCESS) ] = -1,
    122		[ C(RESULT_MISS)   ] = -1,
    123	},
    124	[ C(OP_PREFETCH) ] = {
    125		[ C(RESULT_ACCESS) ] = -1,
    126		[ C(RESULT_MISS)   ] = -1,
    127	},
    128 },
    129};
    130
    131static __initconst const u64 amd_hw_cache_event_ids_f17h
    132				[PERF_COUNT_HW_CACHE_MAX]
    133				[PERF_COUNT_HW_CACHE_OP_MAX]
    134				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
    135[C(L1D)] = {
    136	[C(OP_READ)] = {
    137		[C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */
    138		[C(RESULT_MISS)]   = 0xc860, /* L2$ access from DC Miss */
    139	},
    140	[C(OP_WRITE)] = {
    141		[C(RESULT_ACCESS)] = 0,
    142		[C(RESULT_MISS)]   = 0,
    143	},
    144	[C(OP_PREFETCH)] = {
    145		[C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */
    146		[C(RESULT_MISS)]   = 0,
    147	},
    148},
    149[C(L1I)] = {
    150	[C(OP_READ)] = {
    151		[C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches  */
    152		[C(RESULT_MISS)]   = 0x0081, /* Instruction cache misses   */
    153	},
    154	[C(OP_WRITE)] = {
    155		[C(RESULT_ACCESS)] = -1,
    156		[C(RESULT_MISS)]   = -1,
    157	},
    158	[C(OP_PREFETCH)] = {
    159		[C(RESULT_ACCESS)] = 0,
    160		[C(RESULT_MISS)]   = 0,
    161	},
    162},
    163[C(LL)] = {
    164	[C(OP_READ)] = {
    165		[C(RESULT_ACCESS)] = 0,
    166		[C(RESULT_MISS)]   = 0,
    167	},
    168	[C(OP_WRITE)] = {
    169		[C(RESULT_ACCESS)] = 0,
    170		[C(RESULT_MISS)]   = 0,
    171	},
    172	[C(OP_PREFETCH)] = {
    173		[C(RESULT_ACCESS)] = 0,
    174		[C(RESULT_MISS)]   = 0,
    175	},
    176},
    177[C(DTLB)] = {
    178	[C(OP_READ)] = {
    179		[C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */
    180		[C(RESULT_MISS)]   = 0xf045, /* L2 DTLB misses (PT walks) */
    181	},
    182	[C(OP_WRITE)] = {
    183		[C(RESULT_ACCESS)] = 0,
    184		[C(RESULT_MISS)]   = 0,
    185	},
    186	[C(OP_PREFETCH)] = {
    187		[C(RESULT_ACCESS)] = 0,
    188		[C(RESULT_MISS)]   = 0,
    189	},
    190},
    191[C(ITLB)] = {
    192	[C(OP_READ)] = {
    193		[C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */
    194		[C(RESULT_MISS)]   = 0xff85, /* L1 ITLB misses, L2 misses */
    195	},
    196	[C(OP_WRITE)] = {
    197		[C(RESULT_ACCESS)] = -1,
    198		[C(RESULT_MISS)]   = -1,
    199	},
    200	[C(OP_PREFETCH)] = {
    201		[C(RESULT_ACCESS)] = -1,
    202		[C(RESULT_MISS)]   = -1,
    203	},
    204},
    205[C(BPU)] = {
    206	[C(OP_READ)] = {
    207		[C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr.      */
    208		[C(RESULT_MISS)]   = 0x00c3, /* Retired Mispredicted BI    */
    209	},
    210	[C(OP_WRITE)] = {
    211		[C(RESULT_ACCESS)] = -1,
    212		[C(RESULT_MISS)]   = -1,
    213	},
    214	[C(OP_PREFETCH)] = {
    215		[C(RESULT_ACCESS)] = -1,
    216		[C(RESULT_MISS)]   = -1,
    217	},
    218},
    219[C(NODE)] = {
    220	[C(OP_READ)] = {
    221		[C(RESULT_ACCESS)] = 0,
    222		[C(RESULT_MISS)]   = 0,
    223	},
    224	[C(OP_WRITE)] = {
    225		[C(RESULT_ACCESS)] = -1,
    226		[C(RESULT_MISS)]   = -1,
    227	},
    228	[C(OP_PREFETCH)] = {
    229		[C(RESULT_ACCESS)] = -1,
    230		[C(RESULT_MISS)]   = -1,
    231	},
    232},
    233};
    234
    235/*
    236 * AMD Performance Monitor K7 and later, up to and including Family 16h:
    237 */
    238static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
    239{
    240	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
    241	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
    242	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x077d,
    243	[PERF_COUNT_HW_CACHE_MISSES]		= 0x077e,
    244	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
    245	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
    246	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00d0, /* "Decoder empty" event */
    247	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x00d1, /* "Dispatch stalls" event */
    248};
    249
    250/*
    251 * AMD Performance Monitor Family 17h and later:
    252 */
    253static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
    254{
    255	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
    256	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
    257	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
    258	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0964,
    259	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
    260	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
    261	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x0287,
    262	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x0187,
    263};
    264
    265static u64 amd_pmu_event_map(int hw_event)
    266{
    267	if (boot_cpu_data.x86 >= 0x17)
    268		return amd_f17h_perfmon_event_map[hw_event];
    269
    270	return amd_perfmon_event_map[hw_event];
    271}
    272
    273/*
    274 * Previously calculated offsets
    275 */
    276static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
    277static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
    278
    279/*
    280 * Legacy CPUs:
    281 *   4 counters starting at 0xc0010000 each offset by 1
    282 *
    283 * CPUs with core performance counter extensions:
    284 *   6 counters starting at 0xc0010200 each offset by 2
    285 */
    286static inline int amd_pmu_addr_offset(int index, bool eventsel)
    287{
    288	int offset;
    289
    290	if (!index)
    291		return index;
    292
    293	if (eventsel)
    294		offset = event_offsets[index];
    295	else
    296		offset = count_offsets[index];
    297
    298	if (offset)
    299		return offset;
    300
    301	if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
    302		offset = index;
    303	else
    304		offset = index << 1;
    305
    306	if (eventsel)
    307		event_offsets[index] = offset;
    308	else
    309		count_offsets[index] = offset;
    310
    311	return offset;
    312}
    313
    314/*
    315 * AMD64 events are detected based on their event codes.
    316 */
    317static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
    318{
    319	return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
    320}
    321
    322static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
    323{
    324	if (!(x86_pmu.flags & PMU_FL_PAIR))
    325		return false;
    326
    327	switch (amd_get_event_code(hwc)) {
    328	case 0x003:	return true;	/* Retired SSE/AVX FLOPs */
    329	default:	return false;
    330	}
    331}
    332
    333#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
    334static inline int amd_is_brs_event(struct perf_event *e)
    335{
    336	return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
    337}
    338
    339static int amd_core_hw_config(struct perf_event *event)
    340{
    341	int ret = 0;
    342
    343	if (event->attr.exclude_host && event->attr.exclude_guest)
    344		/*
    345		 * When HO == GO == 1 the hardware treats that as GO == HO == 0
    346		 * and will count in both modes. We don't want to count in that
    347		 * case so we emulate no-counting by setting US = OS = 0.
    348		 */
    349		event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
    350				      ARCH_PERFMON_EVENTSEL_OS);
    351	else if (event->attr.exclude_host)
    352		event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
    353	else if (event->attr.exclude_guest)
    354		event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
    355
    356	if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
    357		event->hw.flags |= PERF_X86_EVENT_PAIR;
    358
    359	/*
    360	 * if branch stack is requested
    361	 */
    362	if (has_branch_stack(event)) {
    363		/*
    364		 * Due to interrupt holding, BRS is not recommended in
    365		 * counting mode.
    366		 */
    367		if (!is_sampling_event(event))
    368			return -EINVAL;
    369
    370		/*
    371		 * Due to the way BRS operates by holding the interrupt until
    372		 * lbr_nr entries have been captured, it does not make sense
    373		 * to allow sampling on BRS with an event that does not match
    374		 * what BRS is capturing, i.e., retired taken branches.
    375		 * Otherwise the correlation with the event's period is even
    376		 * more loose:
    377		 *
    378		 * With retired taken branch:
    379		 *   Effective P = P + 16 + X
    380		 * With any other event:
    381		 *   Effective P = P + Y + X
    382		 *
    383		 * Where X is the number of taken branches due to interrupt
    384		 * skid. Skid is large.
    385		 *
    386		 * Where Y is the occurences of the event while BRS is
    387		 * capturing the lbr_nr entries.
    388		 *
    389		 * By using retired taken branches, we limit the impact on the
    390		 * Y variable. We know it cannot be more than the depth of
    391		 * BRS.
    392		 */
    393		if (!amd_is_brs_event(event))
    394			return -EINVAL;
    395
    396		/*
    397		 * BRS implementation does not work with frequency mode
    398		 * reprogramming of the period.
    399		 */
    400		if (event->attr.freq)
    401			return -EINVAL;
    402		/*
    403		 * The kernel subtracts BRS depth from period, so it must
    404		 * be big enough.
    405		 */
    406		if (event->attr.sample_period <= x86_pmu.lbr_nr)
    407			return -EINVAL;
    408
    409		/*
    410		 * Check if we can allow PERF_SAMPLE_BRANCH_STACK
    411		 */
    412		ret = amd_brs_setup_filter(event);
    413
    414		/* only set in case of success */
    415		if (!ret)
    416			event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
    417	}
    418	return ret;
    419}
    420
    421static inline int amd_is_nb_event(struct hw_perf_event *hwc)
    422{
    423	return (hwc->config & 0xe0) == 0xe0;
    424}
    425
    426static inline int amd_has_nb(struct cpu_hw_events *cpuc)
    427{
    428	struct amd_nb *nb = cpuc->amd_nb;
    429
    430	return nb && nb->nb_id != -1;
    431}
    432
    433static int amd_pmu_hw_config(struct perf_event *event)
    434{
    435	int ret;
    436
    437	/* pass precise event sampling to ibs: */
    438	if (event->attr.precise_ip && get_ibs_caps())
    439		return -ENOENT;
    440
    441	if (has_branch_stack(event) && !x86_pmu.lbr_nr)
    442		return -EOPNOTSUPP;
    443
    444	ret = x86_pmu_hw_config(event);
    445	if (ret)
    446		return ret;
    447
    448	if (event->attr.type == PERF_TYPE_RAW)
    449		event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
    450
    451	return amd_core_hw_config(event);
    452}
    453
    454static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
    455					   struct perf_event *event)
    456{
    457	struct amd_nb *nb = cpuc->amd_nb;
    458	int i;
    459
    460	/*
    461	 * need to scan whole list because event may not have
    462	 * been assigned during scheduling
    463	 *
    464	 * no race condition possible because event can only
    465	 * be removed on one CPU at a time AND PMU is disabled
    466	 * when we come here
    467	 */
    468	for (i = 0; i < x86_pmu.num_counters; i++) {
    469		if (cmpxchg(nb->owners + i, event, NULL) == event)
    470			break;
    471	}
    472}
    473
    474 /*
    475  * AMD64 NorthBridge events need special treatment because
    476  * counter access needs to be synchronized across all cores
    477  * of a package. Refer to BKDG section 3.12
    478  *
    479  * NB events are events measuring L3 cache, Hypertransport
    480  * traffic. They are identified by an event code >= 0xe00.
    481  * They measure events on the NorthBride which is shared
    482  * by all cores on a package. NB events are counted on a
    483  * shared set of counters. When a NB event is programmed
    484  * in a counter, the data actually comes from a shared
    485  * counter. Thus, access to those counters needs to be
    486  * synchronized.
    487  *
    488  * We implement the synchronization such that no two cores
    489  * can be measuring NB events using the same counters. Thus,
    490  * we maintain a per-NB allocation table. The available slot
    491  * is propagated using the event_constraint structure.
    492  *
    493  * We provide only one choice for each NB event based on
    494  * the fact that only NB events have restrictions. Consequently,
    495  * if a counter is available, there is a guarantee the NB event
    496  * will be assigned to it. If no slot is available, an empty
    497  * constraint is returned and scheduling will eventually fail
    498  * for this event.
    499  *
    500  * Note that all cores attached the same NB compete for the same
    501  * counters to host NB events, this is why we use atomic ops. Some
    502  * multi-chip CPUs may have more than one NB.
    503  *
    504  * Given that resources are allocated (cmpxchg), they must be
    505  * eventually freed for others to use. This is accomplished by
    506  * calling __amd_put_nb_event_constraints()
    507  *
    508  * Non NB events are not impacted by this restriction.
    509  */
    510static struct event_constraint *
    511__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
    512			       struct event_constraint *c)
    513{
    514	struct hw_perf_event *hwc = &event->hw;
    515	struct amd_nb *nb = cpuc->amd_nb;
    516	struct perf_event *old;
    517	int idx, new = -1;
    518
    519	if (!c)
    520		c = &unconstrained;
    521
    522	if (cpuc->is_fake)
    523		return c;
    524
    525	/*
    526	 * detect if already present, if so reuse
    527	 *
    528	 * cannot merge with actual allocation
    529	 * because of possible holes
    530	 *
    531	 * event can already be present yet not assigned (in hwc->idx)
    532	 * because of successive calls to x86_schedule_events() from
    533	 * hw_perf_group_sched_in() without hw_perf_enable()
    534	 */
    535	for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
    536		if (new == -1 || hwc->idx == idx)
    537			/* assign free slot, prefer hwc->idx */
    538			old = cmpxchg(nb->owners + idx, NULL, event);
    539		else if (nb->owners[idx] == event)
    540			/* event already present */
    541			old = event;
    542		else
    543			continue;
    544
    545		if (old && old != event)
    546			continue;
    547
    548		/* reassign to this slot */
    549		if (new != -1)
    550			cmpxchg(nb->owners + new, event, NULL);
    551		new = idx;
    552
    553		/* already present, reuse */
    554		if (old == event)
    555			break;
    556	}
    557
    558	if (new == -1)
    559		return &emptyconstraint;
    560
    561	return &nb->event_constraints[new];
    562}
    563
    564static struct amd_nb *amd_alloc_nb(int cpu)
    565{
    566	struct amd_nb *nb;
    567	int i;
    568
    569	nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
    570	if (!nb)
    571		return NULL;
    572
    573	nb->nb_id = -1;
    574
    575	/*
    576	 * initialize all possible NB constraints
    577	 */
    578	for (i = 0; i < x86_pmu.num_counters; i++) {
    579		__set_bit(i, nb->event_constraints[i].idxmsk);
    580		nb->event_constraints[i].weight = 1;
    581	}
    582	return nb;
    583}
    584
    585static void amd_pmu_cpu_reset(int cpu)
    586{
    587	if (x86_pmu.version < 2)
    588		return;
    589
    590	/* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */
    591	wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
    592
    593	/* Clear overflow bits i.e. PerfCntrGLobalStatus.PerfCntrOvfl */
    594	wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, amd_pmu_global_cntr_mask);
    595}
    596
    597static int amd_pmu_cpu_prepare(int cpu)
    598{
    599	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
    600
    601	WARN_ON_ONCE(cpuc->amd_nb);
    602
    603	if (!x86_pmu.amd_nb_constraints)
    604		return 0;
    605
    606	cpuc->amd_nb = amd_alloc_nb(cpu);
    607	if (!cpuc->amd_nb)
    608		return -ENOMEM;
    609
    610	return 0;
    611}
    612
    613static void amd_pmu_cpu_starting(int cpu)
    614{
    615	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
    616	void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
    617	struct amd_nb *nb;
    618	int i, nb_id;
    619
    620	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
    621
    622	if (!x86_pmu.amd_nb_constraints)
    623		return;
    624
    625	nb_id = topology_die_id(cpu);
    626	WARN_ON_ONCE(nb_id == BAD_APICID);
    627
    628	for_each_online_cpu(i) {
    629		nb = per_cpu(cpu_hw_events, i).amd_nb;
    630		if (WARN_ON_ONCE(!nb))
    631			continue;
    632
    633		if (nb->nb_id == nb_id) {
    634			*onln = cpuc->amd_nb;
    635			cpuc->amd_nb = nb;
    636			break;
    637		}
    638	}
    639
    640	cpuc->amd_nb->nb_id = nb_id;
    641	cpuc->amd_nb->refcnt++;
    642
    643	amd_brs_reset();
    644	amd_pmu_cpu_reset(cpu);
    645}
    646
    647static void amd_pmu_cpu_dead(int cpu)
    648{
    649	struct cpu_hw_events *cpuhw;
    650
    651	if (!x86_pmu.amd_nb_constraints)
    652		return;
    653
    654	cpuhw = &per_cpu(cpu_hw_events, cpu);
    655
    656	if (cpuhw->amd_nb) {
    657		struct amd_nb *nb = cpuhw->amd_nb;
    658
    659		if (nb->nb_id == -1 || --nb->refcnt == 0)
    660			kfree(nb);
    661
    662		cpuhw->amd_nb = NULL;
    663	}
    664
    665	amd_pmu_cpu_reset(cpu);
    666}
    667
    668static inline void amd_pmu_set_global_ctl(u64 ctl)
    669{
    670	wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
    671}
    672
    673static inline u64 amd_pmu_get_global_status(void)
    674{
    675	u64 status;
    676
    677	/* PerfCntrGlobalStatus is read-only */
    678	rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
    679
    680	return status & amd_pmu_global_cntr_mask;
    681}
    682
    683static inline void amd_pmu_ack_global_status(u64 status)
    684{
    685	/*
    686	 * PerfCntrGlobalStatus is read-only but an overflow acknowledgment
    687	 * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr
    688	 * clears the same bit in PerfCntrGlobalStatus
    689	 */
    690
    691	/* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */
    692	status &= amd_pmu_global_cntr_mask;
    693	wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
    694}
    695
    696static bool amd_pmu_test_overflow_topbit(int idx)
    697{
    698	u64 counter;
    699
    700	rdmsrl(x86_pmu_event_addr(idx), counter);
    701
    702	return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
    703}
    704
    705static bool amd_pmu_test_overflow_status(int idx)
    706{
    707	return amd_pmu_get_global_status() & BIT_ULL(idx);
    708}
    709
    710DEFINE_STATIC_CALL(amd_pmu_test_overflow, amd_pmu_test_overflow_topbit);
    711
    712/*
    713 * When a PMC counter overflows, an NMI is used to process the event and
    714 * reset the counter. NMI latency can result in the counter being updated
    715 * before the NMI can run, which can result in what appear to be spurious
    716 * NMIs. This function is intended to wait for the NMI to run and reset
    717 * the counter to avoid possible unhandled NMI messages.
    718 */
    719#define OVERFLOW_WAIT_COUNT	50
    720
    721static void amd_pmu_wait_on_overflow(int idx)
    722{
    723	unsigned int i;
    724
    725	/*
    726	 * Wait for the counter to be reset if it has overflowed. This loop
    727	 * should exit very, very quickly, but just in case, don't wait
    728	 * forever...
    729	 */
    730	for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
    731		if (!static_call(amd_pmu_test_overflow)(idx))
    732			break;
    733
    734		/* Might be in IRQ context, so can't sleep */
    735		udelay(1);
    736	}
    737}
    738
    739static void amd_pmu_check_overflow(void)
    740{
    741	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    742	int idx;
    743
    744	/*
    745	 * This shouldn't be called from NMI context, but add a safeguard here
    746	 * to return, since if we're in NMI context we can't wait for an NMI
    747	 * to reset an overflowed counter value.
    748	 */
    749	if (in_nmi())
    750		return;
    751
    752	/*
    753	 * Check each counter for overflow and wait for it to be reset by the
    754	 * NMI if it has overflowed. This relies on the fact that all active
    755	 * counters are always enabled when this function is called and
    756	 * ARCH_PERFMON_EVENTSEL_INT is always set.
    757	 */
    758	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
    759		if (!test_bit(idx, cpuc->active_mask))
    760			continue;
    761
    762		amd_pmu_wait_on_overflow(idx);
    763	}
    764}
    765
    766static void amd_pmu_enable_event(struct perf_event *event)
    767{
    768	x86_pmu_enable_event(event);
    769}
    770
    771static void amd_pmu_enable_all(int added)
    772{
    773	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    774	int idx;
    775
    776	amd_brs_enable_all();
    777
    778	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
    779		/* only activate events which are marked as active */
    780		if (!test_bit(idx, cpuc->active_mask))
    781			continue;
    782
    783		amd_pmu_enable_event(cpuc->events[idx]);
    784	}
    785}
    786
    787static void amd_pmu_v2_enable_event(struct perf_event *event)
    788{
    789	struct hw_perf_event *hwc = &event->hw;
    790
    791	/*
    792	 * Testing cpu_hw_events.enabled should be skipped in this case unlike
    793	 * in x86_pmu_enable_event().
    794	 *
    795	 * Since cpu_hw_events.enabled is set only after returning from
    796	 * x86_pmu_start(), the PMCs must be programmed and kept ready.
    797	 * Counting starts only after x86_pmu_enable_all() is called.
    798	 */
    799	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
    800}
    801
    802static void amd_pmu_v2_enable_all(int added)
    803{
    804	amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask);
    805}
    806
    807static void amd_pmu_disable_event(struct perf_event *event)
    808{
    809	x86_pmu_disable_event(event);
    810
    811	/*
    812	 * This can be called from NMI context (via x86_pmu_stop). The counter
    813	 * may have overflowed, but either way, we'll never see it get reset
    814	 * by the NMI if we're already in the NMI. And the NMI latency support
    815	 * below will take care of any pending NMI that might have been
    816	 * generated by the overflow.
    817	 */
    818	if (in_nmi())
    819		return;
    820
    821	amd_pmu_wait_on_overflow(event->hw.idx);
    822}
    823
    824static void amd_pmu_disable_all(void)
    825{
    826	amd_brs_disable_all();
    827	x86_pmu_disable_all();
    828	amd_pmu_check_overflow();
    829}
    830
    831static void amd_pmu_v2_disable_all(void)
    832{
    833	/* Disable all PMCs */
    834	amd_pmu_set_global_ctl(0);
    835	amd_pmu_check_overflow();
    836}
    837
    838static void amd_pmu_add_event(struct perf_event *event)
    839{
    840	if (needs_branch_stack(event))
    841		amd_pmu_brs_add(event);
    842}
    843
    844static void amd_pmu_del_event(struct perf_event *event)
    845{
    846	if (needs_branch_stack(event))
    847		amd_pmu_brs_del(event);
    848}
    849
    850/*
    851 * Because of NMI latency, if multiple PMC counters are active or other sources
    852 * of NMIs are received, the perf NMI handler can handle one or more overflowed
    853 * PMC counters outside of the NMI associated with the PMC overflow. If the NMI
    854 * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel
    855 * back-to-back NMI support won't be active. This PMC handler needs to take into
    856 * account that this can occur, otherwise this could result in unknown NMI
    857 * messages being issued. Examples of this is PMC overflow while in the NMI
    858 * handler when multiple PMCs are active or PMC overflow while handling some
    859 * other source of an NMI.
    860 *
    861 * Attempt to mitigate this by creating an NMI window in which un-handled NMIs
    862 * received during this window will be claimed. This prevents extending the
    863 * window past when it is possible that latent NMIs should be received. The
    864 * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has
    865 * handled a counter. When an un-handled NMI is received, it will be claimed
    866 * only if arriving within that window.
    867 */
    868static inline int amd_pmu_adjust_nmi_window(int handled)
    869{
    870	/*
    871	 * If a counter was handled, record a timestamp such that un-handled
    872	 * NMIs will be claimed if arriving within that window.
    873	 */
    874	if (handled) {
    875		this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window);
    876
    877		return handled;
    878	}
    879
    880	if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp)))
    881		return NMI_DONE;
    882
    883	return NMI_HANDLED;
    884}
    885
    886static int amd_pmu_handle_irq(struct pt_regs *regs)
    887{
    888	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    889	int handled;
    890	int pmu_enabled;
    891
    892	/*
    893	 * Save the PMU state.
    894	 * It needs to be restored when leaving the handler.
    895	 */
    896	pmu_enabled = cpuc->enabled;
    897	cpuc->enabled = 0;
    898
    899	/* stop everything (includes BRS) */
    900	amd_pmu_disable_all();
    901
    902	/* Drain BRS is in use (could be inactive) */
    903	if (cpuc->lbr_users)
    904		amd_brs_drain();
    905
    906	/* Process any counter overflows */
    907	handled = x86_pmu_handle_irq(regs);
    908
    909	cpuc->enabled = pmu_enabled;
    910	if (pmu_enabled)
    911		amd_pmu_enable_all(0);
    912
    913	return amd_pmu_adjust_nmi_window(handled);
    914}
    915
    916static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
    917{
    918	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    919	struct perf_sample_data data;
    920	struct hw_perf_event *hwc;
    921	struct perf_event *event;
    922	int handled = 0, idx;
    923	u64 status, mask;
    924	bool pmu_enabled;
    925
    926	/*
    927	 * Save the PMU state as it needs to be restored when leaving the
    928	 * handler
    929	 */
    930	pmu_enabled = cpuc->enabled;
    931	cpuc->enabled = 0;
    932
    933	/* Stop counting */
    934	amd_pmu_v2_disable_all();
    935
    936	status = amd_pmu_get_global_status();
    937
    938	/* Check if any overflows are pending */
    939	if (!status)
    940		goto done;
    941
    942	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
    943		if (!test_bit(idx, cpuc->active_mask))
    944			continue;
    945
    946		event = cpuc->events[idx];
    947		hwc = &event->hw;
    948		x86_perf_event_update(event);
    949		mask = BIT_ULL(idx);
    950
    951		if (!(status & mask))
    952			continue;
    953
    954		/* Event overflow */
    955		handled++;
    956		perf_sample_data_init(&data, 0, hwc->last_period);
    957
    958		if (!x86_perf_event_set_period(event))
    959			continue;
    960
    961		if (perf_event_overflow(event, &data, regs))
    962			x86_pmu_stop(event, 0);
    963
    964		status &= ~mask;
    965	}
    966
    967	/*
    968	 * It should never be the case that some overflows are not handled as
    969	 * the corresponding PMCs are expected to be inactive according to the
    970	 * active_mask
    971	 */
    972	WARN_ON(status > 0);
    973
    974	/* Clear overflow bits */
    975	amd_pmu_ack_global_status(~status);
    976
    977	/*
    978	 * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT
    979	 * PMI entry is not set by the local APIC when a PMC overflow occurs
    980	 */
    981	inc_irq_stat(apic_perf_irqs);
    982
    983done:
    984	cpuc->enabled = pmu_enabled;
    985
    986	/* Resume counting only if PMU is active */
    987	if (pmu_enabled)
    988		amd_pmu_v2_enable_all(0);
    989
    990	return amd_pmu_adjust_nmi_window(handled);
    991}
    992
    993static struct event_constraint *
    994amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
    995			  struct perf_event *event)
    996{
    997	/*
    998	 * if not NB event or no NB, then no constraints
    999	 */
   1000	if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
   1001		return &unconstrained;
   1002
   1003	return __amd_get_nb_event_constraints(cpuc, event, NULL);
   1004}
   1005
   1006static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
   1007				      struct perf_event *event)
   1008{
   1009	if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
   1010		__amd_put_nb_event_constraints(cpuc, event);
   1011}
   1012
   1013PMU_FORMAT_ATTR(event,	"config:0-7,32-35");
   1014PMU_FORMAT_ATTR(umask,	"config:8-15"	);
   1015PMU_FORMAT_ATTR(edge,	"config:18"	);
   1016PMU_FORMAT_ATTR(inv,	"config:23"	);
   1017PMU_FORMAT_ATTR(cmask,	"config:24-31"	);
   1018
   1019static struct attribute *amd_format_attr[] = {
   1020	&format_attr_event.attr,
   1021	&format_attr_umask.attr,
   1022	&format_attr_edge.attr,
   1023	&format_attr_inv.attr,
   1024	&format_attr_cmask.attr,
   1025	NULL,
   1026};
   1027
   1028/* AMD Family 15h */
   1029
   1030#define AMD_EVENT_TYPE_MASK	0x000000F0ULL
   1031
   1032#define AMD_EVENT_FP		0x00000000ULL ... 0x00000010ULL
   1033#define AMD_EVENT_LS		0x00000020ULL ... 0x00000030ULL
   1034#define AMD_EVENT_DC		0x00000040ULL ... 0x00000050ULL
   1035#define AMD_EVENT_CU		0x00000060ULL ... 0x00000070ULL
   1036#define AMD_EVENT_IC_DE		0x00000080ULL ... 0x00000090ULL
   1037#define AMD_EVENT_EX_LS		0x000000C0ULL
   1038#define AMD_EVENT_DE		0x000000D0ULL
   1039#define AMD_EVENT_NB		0x000000E0ULL ... 0x000000F0ULL
   1040
   1041/*
   1042 * AMD family 15h event code/PMC mappings:
   1043 *
   1044 * type = event_code & 0x0F0:
   1045 *
   1046 * 0x000	FP	PERF_CTL[5:3]
   1047 * 0x010	FP	PERF_CTL[5:3]
   1048 * 0x020	LS	PERF_CTL[5:0]
   1049 * 0x030	LS	PERF_CTL[5:0]
   1050 * 0x040	DC	PERF_CTL[5:0]
   1051 * 0x050	DC	PERF_CTL[5:0]
   1052 * 0x060	CU	PERF_CTL[2:0]
   1053 * 0x070	CU	PERF_CTL[2:0]
   1054 * 0x080	IC/DE	PERF_CTL[2:0]
   1055 * 0x090	IC/DE	PERF_CTL[2:0]
   1056 * 0x0A0	---
   1057 * 0x0B0	---
   1058 * 0x0C0	EX/LS	PERF_CTL[5:0]
   1059 * 0x0D0	DE	PERF_CTL[2:0]
   1060 * 0x0E0	NB	NB_PERF_CTL[3:0]
   1061 * 0x0F0	NB	NB_PERF_CTL[3:0]
   1062 *
   1063 * Exceptions:
   1064 *
   1065 * 0x000	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
   1066 * 0x003	FP	PERF_CTL[3]
   1067 * 0x004	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
   1068 * 0x00B	FP	PERF_CTL[3]
   1069 * 0x00D	FP	PERF_CTL[3]
   1070 * 0x023	DE	PERF_CTL[2:0]
   1071 * 0x02D	LS	PERF_CTL[3]
   1072 * 0x02E	LS	PERF_CTL[3,0]
   1073 * 0x031	LS	PERF_CTL[2:0] (**)
   1074 * 0x043	CU	PERF_CTL[2:0]
   1075 * 0x045	CU	PERF_CTL[2:0]
   1076 * 0x046	CU	PERF_CTL[2:0]
   1077 * 0x054	CU	PERF_CTL[2:0]
   1078 * 0x055	CU	PERF_CTL[2:0]
   1079 * 0x08F	IC	PERF_CTL[0]
   1080 * 0x187	DE	PERF_CTL[0]
   1081 * 0x188	DE	PERF_CTL[0]
   1082 * 0x0DB	EX	PERF_CTL[5:0]
   1083 * 0x0DC	LS	PERF_CTL[5:0]
   1084 * 0x0DD	LS	PERF_CTL[5:0]
   1085 * 0x0DE	LS	PERF_CTL[5:0]
   1086 * 0x0DF	LS	PERF_CTL[5:0]
   1087 * 0x1C0	EX	PERF_CTL[5:3]
   1088 * 0x1D6	EX	PERF_CTL[5:0]
   1089 * 0x1D8	EX	PERF_CTL[5:0]
   1090 *
   1091 * (*)  depending on the umask all FPU counters may be used
   1092 * (**) only one unitmask enabled at a time
   1093 */
   1094
   1095static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
   1096static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
   1097static struct event_constraint amd_f15_PMC3  = EVENT_CONSTRAINT(0, 0x08, 0);
   1098static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
   1099static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
   1100static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
   1101
   1102static struct event_constraint *
   1103amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
   1104			       struct perf_event *event)
   1105{
   1106	struct hw_perf_event *hwc = &event->hw;
   1107	unsigned int event_code = amd_get_event_code(hwc);
   1108
   1109	switch (event_code & AMD_EVENT_TYPE_MASK) {
   1110	case AMD_EVENT_FP:
   1111		switch (event_code) {
   1112		case 0x000:
   1113			if (!(hwc->config & 0x0000F000ULL))
   1114				break;
   1115			if (!(hwc->config & 0x00000F00ULL))
   1116				break;
   1117			return &amd_f15_PMC3;
   1118		case 0x004:
   1119			if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
   1120				break;
   1121			return &amd_f15_PMC3;
   1122		case 0x003:
   1123		case 0x00B:
   1124		case 0x00D:
   1125			return &amd_f15_PMC3;
   1126		}
   1127		return &amd_f15_PMC53;
   1128	case AMD_EVENT_LS:
   1129	case AMD_EVENT_DC:
   1130	case AMD_EVENT_EX_LS:
   1131		switch (event_code) {
   1132		case 0x023:
   1133		case 0x043:
   1134		case 0x045:
   1135		case 0x046:
   1136		case 0x054:
   1137		case 0x055:
   1138			return &amd_f15_PMC20;
   1139		case 0x02D:
   1140			return &amd_f15_PMC3;
   1141		case 0x02E:
   1142			return &amd_f15_PMC30;
   1143		case 0x031:
   1144			if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
   1145				return &amd_f15_PMC20;
   1146			return &emptyconstraint;
   1147		case 0x1C0:
   1148			return &amd_f15_PMC53;
   1149		default:
   1150			return &amd_f15_PMC50;
   1151		}
   1152	case AMD_EVENT_CU:
   1153	case AMD_EVENT_IC_DE:
   1154	case AMD_EVENT_DE:
   1155		switch (event_code) {
   1156		case 0x08F:
   1157		case 0x187:
   1158		case 0x188:
   1159			return &amd_f15_PMC0;
   1160		case 0x0DB ... 0x0DF:
   1161		case 0x1D6:
   1162		case 0x1D8:
   1163			return &amd_f15_PMC50;
   1164		default:
   1165			return &amd_f15_PMC20;
   1166		}
   1167	case AMD_EVENT_NB:
   1168		/* moved to uncore.c */
   1169		return &emptyconstraint;
   1170	default:
   1171		return &emptyconstraint;
   1172	}
   1173}
   1174
   1175static struct event_constraint pair_constraint;
   1176
   1177static struct event_constraint *
   1178amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx,
   1179			       struct perf_event *event)
   1180{
   1181	struct hw_perf_event *hwc = &event->hw;
   1182
   1183	if (amd_is_pair_event_code(hwc))
   1184		return &pair_constraint;
   1185
   1186	return &unconstrained;
   1187}
   1188
   1189static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
   1190					   struct perf_event *event)
   1191{
   1192	struct hw_perf_event *hwc = &event->hw;
   1193
   1194	if (is_counter_pair(hwc))
   1195		--cpuc->n_pair;
   1196}
   1197
   1198/*
   1199 * Because of the way BRS operates with an inactive and active phases, and
   1200 * the link to one counter, it is not possible to have two events using BRS
   1201 * scheduled at the same time. There would be an issue with enforcing the
   1202 * period of each one and given that the BRS saturates, it would not be possible
   1203 * to guarantee correlated content for all events. Therefore, in situations
   1204 * where multiple events want to use BRS, the kernel enforces mutual exclusion.
   1205 * Exclusion is enforced by chosing only one counter for events using BRS.
   1206 * The event scheduling logic will then automatically multiplex the
   1207 * events and ensure that at most one event is actively using BRS.
   1208 *
   1209 * The BRS counter could be any counter, but there is no constraint on Fam19h,
   1210 * therefore all counters are equal and thus we pick the first one: PMC0
   1211 */
   1212static struct event_constraint amd_fam19h_brs_cntr0_constraint =
   1213	EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK);
   1214
   1215static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint =
   1216	__EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAIR);
   1217
   1218static struct event_constraint *
   1219amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx,
   1220			  struct perf_event *event)
   1221{
   1222	struct hw_perf_event *hwc = &event->hw;
   1223	bool has_brs = has_amd_brs(hwc);
   1224
   1225	/*
   1226	 * In case BRS is used with an event requiring a counter pair,
   1227	 * the kernel allows it but only on counter 0 & 1 to enforce
   1228	 * multiplexing requiring to protect BRS in case of multiple
   1229	 * BRS users
   1230	 */
   1231	if (amd_is_pair_event_code(hwc)) {
   1232		return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint
   1233			       : &pair_constraint;
   1234	}
   1235
   1236	if (has_brs)
   1237		return &amd_fam19h_brs_cntr0_constraint;
   1238
   1239	return &unconstrained;
   1240}
   1241
   1242
   1243static ssize_t amd_event_sysfs_show(char *page, u64 config)
   1244{
   1245	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
   1246		    (config & AMD64_EVENTSEL_EVENT) >> 24;
   1247
   1248	return x86_event_sysfs_show(page, config, event);
   1249}
   1250
   1251static void amd_pmu_sched_task(struct perf_event_context *ctx,
   1252				 bool sched_in)
   1253{
   1254	if (sched_in && x86_pmu.lbr_nr)
   1255		amd_pmu_brs_sched_task(ctx, sched_in);
   1256}
   1257
   1258static u64 amd_pmu_limit_period(struct perf_event *event, u64 left)
   1259{
   1260	/*
   1261	 * Decrease period by the depth of the BRS feature to get the last N
   1262	 * taken branches and approximate the desired period
   1263	 */
   1264	if (has_branch_stack(event) && left > x86_pmu.lbr_nr)
   1265		left -= x86_pmu.lbr_nr;
   1266
   1267	return left;
   1268}
   1269
   1270static __initconst const struct x86_pmu amd_pmu = {
   1271	.name			= "AMD",
   1272	.handle_irq		= amd_pmu_handle_irq,
   1273	.disable_all		= amd_pmu_disable_all,
   1274	.enable_all		= amd_pmu_enable_all,
   1275	.enable			= amd_pmu_enable_event,
   1276	.disable		= amd_pmu_disable_event,
   1277	.hw_config		= amd_pmu_hw_config,
   1278	.schedule_events	= x86_schedule_events,
   1279	.eventsel		= MSR_K7_EVNTSEL0,
   1280	.perfctr		= MSR_K7_PERFCTR0,
   1281	.addr_offset            = amd_pmu_addr_offset,
   1282	.event_map		= amd_pmu_event_map,
   1283	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
   1284	.num_counters		= AMD64_NUM_COUNTERS,
   1285	.add			= amd_pmu_add_event,
   1286	.del			= amd_pmu_del_event,
   1287	.cntval_bits		= 48,
   1288	.cntval_mask		= (1ULL << 48) - 1,
   1289	.apic			= 1,
   1290	/* use highest bit to detect overflow */
   1291	.max_period		= (1ULL << 47) - 1,
   1292	.get_event_constraints	= amd_get_event_constraints,
   1293	.put_event_constraints	= amd_put_event_constraints,
   1294
   1295	.format_attrs		= amd_format_attr,
   1296	.events_sysfs_show	= amd_event_sysfs_show,
   1297
   1298	.cpu_prepare		= amd_pmu_cpu_prepare,
   1299	.cpu_starting		= amd_pmu_cpu_starting,
   1300	.cpu_dead		= amd_pmu_cpu_dead,
   1301
   1302	.amd_nb_constraints	= 1,
   1303};
   1304
   1305static ssize_t branches_show(struct device *cdev,
   1306			      struct device_attribute *attr,
   1307			      char *buf)
   1308{
   1309	return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
   1310}
   1311
   1312static DEVICE_ATTR_RO(branches);
   1313
   1314static struct attribute *amd_pmu_brs_attrs[] = {
   1315	&dev_attr_branches.attr,
   1316	NULL,
   1317};
   1318
   1319static umode_t
   1320amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
   1321{
   1322	return x86_pmu.lbr_nr ? attr->mode : 0;
   1323}
   1324
   1325static struct attribute_group group_caps_amd_brs = {
   1326	.name  = "caps",
   1327	.attrs = amd_pmu_brs_attrs,
   1328	.is_visible = amd_brs_is_visible,
   1329};
   1330
   1331EVENT_ATTR_STR(branch-brs, amd_branch_brs,
   1332	       "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n");
   1333
   1334static struct attribute *amd_brs_events_attrs[] = {
   1335	EVENT_PTR(amd_branch_brs),
   1336	NULL,
   1337};
   1338
   1339static struct attribute_group group_events_amd_brs = {
   1340	.name       = "events",
   1341	.attrs      = amd_brs_events_attrs,
   1342	.is_visible = amd_brs_is_visible,
   1343};
   1344
   1345static const struct attribute_group *amd_attr_update[] = {
   1346	&group_caps_amd_brs,
   1347	&group_events_amd_brs,
   1348	NULL,
   1349};
   1350
   1351static int __init amd_core_pmu_init(void)
   1352{
   1353	union cpuid_0x80000022_ebx ebx;
   1354	u64 even_ctr_mask = 0ULL;
   1355	int i;
   1356
   1357	if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
   1358		return 0;
   1359
   1360	/* Avoid calculating the value each time in the NMI handler */
   1361	perf_nmi_window = msecs_to_jiffies(100);
   1362
   1363	/*
   1364	 * If core performance counter extensions exists, we must use
   1365	 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
   1366	 * amd_pmu_addr_offset().
   1367	 */
   1368	x86_pmu.eventsel	= MSR_F15H_PERF_CTL;
   1369	x86_pmu.perfctr		= MSR_F15H_PERF_CTR;
   1370	x86_pmu.num_counters	= AMD64_NUM_COUNTERS_CORE;
   1371
   1372	/* Check for Performance Monitoring v2 support */
   1373	if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
   1374		ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
   1375
   1376		/* Update PMU version for later usage */
   1377		x86_pmu.version = 2;
   1378
   1379		/* Find the number of available Core PMCs */
   1380		x86_pmu.num_counters = ebx.split.num_core_pmc;
   1381
   1382		amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
   1383
   1384		/* Update PMC handling functions */
   1385		x86_pmu.enable_all = amd_pmu_v2_enable_all;
   1386		x86_pmu.disable_all = amd_pmu_v2_disable_all;
   1387		x86_pmu.enable = amd_pmu_v2_enable_event;
   1388		x86_pmu.handle_irq = amd_pmu_v2_handle_irq;
   1389		static_call_update(amd_pmu_test_overflow, amd_pmu_test_overflow_status);
   1390	}
   1391
   1392	/*
   1393	 * AMD Core perfctr has separate MSRs for the NB events, see
   1394	 * the amd/uncore.c driver.
   1395	 */
   1396	x86_pmu.amd_nb_constraints = 0;
   1397
   1398	if (boot_cpu_data.x86 == 0x15) {
   1399		pr_cont("Fam15h ");
   1400		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
   1401	}
   1402	if (boot_cpu_data.x86 >= 0x17) {
   1403		pr_cont("Fam17h+ ");
   1404		/*
   1405		 * Family 17h and compatibles have constraints for Large
   1406		 * Increment per Cycle events: they may only be assigned an
   1407		 * even numbered counter that has a consecutive adjacent odd
   1408		 * numbered counter following it.
   1409		 */
   1410		for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
   1411			even_ctr_mask |= 1 << i;
   1412
   1413		pair_constraint = (struct event_constraint)
   1414				    __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
   1415				    x86_pmu.num_counters / 2, 0,
   1416				    PERF_X86_EVENT_PAIR);
   1417
   1418		x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
   1419		x86_pmu.put_event_constraints = amd_put_event_constraints_f17h;
   1420		x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE;
   1421		x86_pmu.flags |= PMU_FL_PAIR;
   1422	}
   1423
   1424	/*
   1425	 * BRS requires special event constraints and flushing on ctxsw.
   1426	 */
   1427	if (boot_cpu_data.x86 >= 0x19 && !amd_brs_init()) {
   1428		x86_pmu.get_event_constraints = amd_get_event_constraints_f19h;
   1429		x86_pmu.sched_task = amd_pmu_sched_task;
   1430		x86_pmu.limit_period = amd_pmu_limit_period;
   1431		/*
   1432		 * put_event_constraints callback same as Fam17h, set above
   1433		 */
   1434
   1435		/* branch sampling must be stopped when entering low power */
   1436		amd_brs_lopwr_init();
   1437	}
   1438
   1439	x86_pmu.attr_update = amd_attr_update;
   1440
   1441	pr_cont("core perfctr, ");
   1442	return 0;
   1443}
   1444
   1445__init int amd_pmu_init(void)
   1446{
   1447	int ret;
   1448
   1449	/* Performance-monitoring supported from K7 and later: */
   1450	if (boot_cpu_data.x86 < 6)
   1451		return -ENODEV;
   1452
   1453	x86_pmu = amd_pmu;
   1454
   1455	ret = amd_core_pmu_init();
   1456	if (ret)
   1457		return ret;
   1458
   1459	if (num_possible_cpus() == 1) {
   1460		/*
   1461		 * No point in allocating data structures to serialize
   1462		 * against other CPUs, when there is only the one CPU.
   1463		 */
   1464		x86_pmu.amd_nb_constraints = 0;
   1465	}
   1466
   1467	if (boot_cpu_data.x86 >= 0x17)
   1468		memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids));
   1469	else
   1470		memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids));
   1471
   1472	return 0;
   1473}
   1474
   1475static inline void amd_pmu_reload_virt(void)
   1476{
   1477	if (x86_pmu.version >= 2) {
   1478		/*
   1479		 * Clear global enable bits, reprogram the PERF_CTL
   1480		 * registers with updated perf_ctr_virt_mask and then
   1481		 * set global enable bits once again
   1482		 */
   1483		amd_pmu_v2_disable_all();
   1484		amd_pmu_enable_all(0);
   1485		amd_pmu_v2_enable_all(0);
   1486		return;
   1487	}
   1488
   1489	amd_pmu_disable_all();
   1490	amd_pmu_enable_all(0);
   1491}
   1492
   1493void amd_pmu_enable_virt(void)
   1494{
   1495	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   1496
   1497	cpuc->perf_ctr_virt_mask = 0;
   1498
   1499	/* Reload all events */
   1500	amd_pmu_reload_virt();
   1501}
   1502EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
   1503
   1504void amd_pmu_disable_virt(void)
   1505{
   1506	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
   1507
   1508	/*
   1509	 * We only mask out the Host-only bit so that host-only counting works
   1510	 * when SVM is disabled. If someone sets up a guest-only counter when
   1511	 * SVM is disabled the Guest-only bits still gets set and the counter
   1512	 * will not count anything.
   1513	 */
   1514	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
   1515
   1516	/* Reload all events */
   1517	amd_pmu_reload_virt();
   1518}
   1519EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);