cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

brs.c (8404B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Implement support for AMD Fam19h Branch Sampling feature
      4 * Based on specifications published in AMD PPR Fam19 Model 01
      5 *
      6 * Copyright 2021 Google LLC
      7 * Contributed by Stephane Eranian <eranian@google.com>
      8 */
      9#include <linux/kernel.h>
     10#include <linux/jump_label.h>
     11#include <asm/msr.h>
     12#include <asm/cpufeature.h>
     13
     14#include "../perf_event.h"
     15
     16#define BRS_POISON	0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */
     17
     18/* Debug Extension Configuration register layout */
     19union amd_debug_extn_cfg {
     20	__u64 val;
     21	struct {
     22		__u64	rsvd0:2,  /* reserved */
     23			brsmen:1, /* branch sample enable */
     24			rsvd4_3:2,/* reserved - must be 0x3 */
     25			vb:1,     /* valid branches recorded */
     26			rsvd2:10, /* reserved */
     27			msroff:4, /* index of next entry to write */
     28			rsvd3:4,  /* reserved */
     29			pmc:3,    /* #PMC holding the sampling event */
     30			rsvd4:37; /* reserved */
     31	};
     32};
     33
     34static inline unsigned int brs_from(int idx)
     35{
     36	return MSR_AMD_SAMP_BR_FROM + 2 * idx;
     37}
     38
     39static inline unsigned int brs_to(int idx)
     40{
     41	return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
     42}
     43
     44static inline void set_debug_extn_cfg(u64 val)
     45{
     46	/* bits[4:3] must always be set to 11b */
     47	wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3);
     48}
     49
     50static inline u64 get_debug_extn_cfg(void)
     51{
     52	u64 val;
     53
     54	rdmsrl(MSR_AMD_DBG_EXTN_CFG, val);
     55	return val;
     56}
     57
     58static bool __init amd_brs_detect(void)
     59{
     60	if (!cpu_feature_enabled(X86_FEATURE_BRS))
     61		return false;
     62
     63	switch (boot_cpu_data.x86) {
     64	case 0x19: /* AMD Fam19h (Zen3) */
     65		x86_pmu.lbr_nr = 16;
     66
     67		/* No hardware filtering supported */
     68		x86_pmu.lbr_sel_map = NULL;
     69		x86_pmu.lbr_sel_mask = 0;
     70		break;
     71	default:
     72		return false;
     73	}
     74
     75	return true;
     76}
     77
     78/*
     79 * Current BRS implementation does not support branch type or privilege level
     80 * filtering. Therefore, this function simply enforces these limitations. No need for
     81 * a br_sel_map. Software filtering is not supported because it would not correlate well
     82 * with a sampling period.
     83 */
     84int amd_brs_setup_filter(struct perf_event *event)
     85{
     86	u64 type = event->attr.branch_sample_type;
     87
     88	/* No BRS support */
     89	if (!x86_pmu.lbr_nr)
     90		return -EOPNOTSUPP;
     91
     92	/* Can only capture all branches, i.e., no filtering */
     93	if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY)
     94		return -EINVAL;
     95
     96	return 0;
     97}
     98
     99/* tos = top of stack, i.e., last valid entry written */
    100static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
    101{
    102	/*
    103	 * msroff: index of next entry to write so top-of-stack is one off
    104	 * if BRS is full then msroff is set back to 0.
    105	 */
    106	return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1;
    107}
    108
    109/*
    110 * make sure we have a sane BRS offset to begin with
    111 * especially with kexec
    112 */
    113void amd_brs_reset(void)
    114{
    115	if (!cpu_feature_enabled(X86_FEATURE_BRS))
    116		return;
    117
    118	/*
    119	 * Reset config
    120	 */
    121	set_debug_extn_cfg(0);
    122
    123	/*
    124	 * Mark first entry as poisoned
    125	 */
    126	wrmsrl(brs_to(0), BRS_POISON);
    127}
    128
    129int __init amd_brs_init(void)
    130{
    131	if (!amd_brs_detect())
    132		return -EOPNOTSUPP;
    133
    134	pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr);
    135
    136	return 0;
    137}
    138
    139void amd_brs_enable(void)
    140{
    141	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    142	union amd_debug_extn_cfg cfg;
    143
    144	/* Activate only on first user */
    145	if (++cpuc->brs_active > 1)
    146		return;
    147
    148	cfg.val    = 0; /* reset all fields */
    149	cfg.brsmen = 1; /* enable branch sampling */
    150
    151	/* Set enable bit */
    152	set_debug_extn_cfg(cfg.val);
    153}
    154
    155void amd_brs_enable_all(void)
    156{
    157	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    158	if (cpuc->lbr_users)
    159		amd_brs_enable();
    160}
    161
    162void amd_brs_disable(void)
    163{
    164	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    165	union amd_debug_extn_cfg cfg;
    166
    167	/* Check if active (could be disabled via x86_pmu_disable_all()) */
    168	if (!cpuc->brs_active)
    169		return;
    170
    171	/* Only disable for last user */
    172	if (--cpuc->brs_active)
    173		return;
    174
    175	/*
    176	 * Clear the brsmen bit but preserve the others as they contain
    177	 * useful state such as vb and msroff
    178	 */
    179	cfg.val = get_debug_extn_cfg();
    180
    181	/*
    182	 * When coming in on interrupt and BRS is full, then hw will have
    183	 * already stopped BRS, no need to issue wrmsr again
    184	 */
    185	if (cfg.brsmen) {
    186		cfg.brsmen = 0;
    187		set_debug_extn_cfg(cfg.val);
    188	}
    189}
    190
    191void amd_brs_disable_all(void)
    192{
    193	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    194	if (cpuc->lbr_users)
    195		amd_brs_disable();
    196}
    197
    198static bool amd_brs_match_plm(struct perf_event *event, u64 to)
    199{
    200	int type = event->attr.branch_sample_type;
    201	int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV;
    202	int plm_u = PERF_SAMPLE_BRANCH_USER;
    203
    204	if (!(type & plm_k) && kernel_ip(to))
    205		return 0;
    206
    207	if (!(type & plm_u) && !kernel_ip(to))
    208		return 0;
    209
    210	return 1;
    211}
    212
    213/*
    214 * Caller must ensure amd_brs_inuse() is true before calling
    215 * return:
    216 */
    217void amd_brs_drain(void)
    218{
    219	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    220	struct perf_event *event = cpuc->events[0];
    221	struct perf_branch_entry *br = cpuc->lbr_entries;
    222	union amd_debug_extn_cfg cfg;
    223	u32 i, nr = 0, num, tos, start;
    224	u32 shift = 64 - boot_cpu_data.x86_virt_bits;
    225
    226	/*
    227	 * BRS event forced on PMC0,
    228	 * so check if there is an event.
    229	 * It is possible to have lbr_users > 0 but the event
    230	 * not yet scheduled due to long latency PMU irq
    231	 */
    232	if (!event)
    233		goto empty;
    234
    235	cfg.val = get_debug_extn_cfg();
    236
    237	/* Sanity check [0-x86_pmu.lbr_nr] */
    238	if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr))
    239		goto empty;
    240
    241	/* No valid branch */
    242	if (cfg.vb == 0)
    243		goto empty;
    244
    245	/*
    246	 * msr.off points to next entry to be written
    247	 * tos = most recent entry index = msr.off - 1
    248	 * BRS register buffer saturates, so we know we have
    249	 * start < tos and that we have to read from start to tos
    250	 */
    251	start = 0;
    252	tos = amd_brs_get_tos(&cfg);
    253
    254	num = tos - start + 1;
    255
    256	/*
    257	 * BRS is only one pass (saturation) from MSROFF to depth-1
    258	 * MSROFF wraps to zero when buffer is full
    259	 */
    260	for (i = 0; i < num; i++) {
    261		u32 brs_idx = tos - i;
    262		u64 from, to;
    263
    264		rdmsrl(brs_to(brs_idx), to);
    265
    266		/* Entry does not belong to us (as marked by kernel) */
    267		if (to == BRS_POISON)
    268			break;
    269
    270		/*
    271		 * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved.
    272		 * Necessary to generate proper virtual addresses suitable for
    273		 * symbolization
    274		 */
    275		to = (u64)(((s64)to << shift) >> shift);
    276
    277		if (!amd_brs_match_plm(event, to))
    278			continue;
    279
    280		rdmsrl(brs_from(brs_idx), from);
    281
    282		perf_clear_branch_entry_bitfields(br+nr);
    283
    284		br[nr].from = from;
    285		br[nr].to   = to;
    286
    287		nr++;
    288	}
    289empty:
    290	/* Record number of sampled branches */
    291	cpuc->lbr_stack.nr = nr;
    292}
    293
    294/*
    295 * Poison most recent entry to prevent reuse by next task
    296 * required because BRS entry are not tagged by PID
    297 */
    298static void amd_brs_poison_buffer(void)
    299{
    300	union amd_debug_extn_cfg cfg;
    301	unsigned int idx;
    302
    303	/* Get current state */
    304	cfg.val = get_debug_extn_cfg();
    305
    306	/* idx is most recently written entry */
    307	idx = amd_brs_get_tos(&cfg);
    308
    309	/* Poison target of entry */
    310	wrmsrl(brs_to(idx), BRS_POISON);
    311}
    312
    313/*
    314 * On context switch in, we need to make sure no samples from previous user
    315 * are left in the BRS.
    316 *
    317 * On ctxswin, sched_in = true, called after the PMU has started
    318 * On ctxswout, sched_in = false, called before the PMU is stopped
    319 */
    320void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
    321{
    322	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    323
    324	/* no active users */
    325	if (!cpuc->lbr_users)
    326		return;
    327
    328	/*
    329	 * On context switch in, we need to ensure we do not use entries
    330	 * from previous BRS user on that CPU, so we poison the buffer as
    331	 * a faster way compared to resetting all entries.
    332	 */
    333	if (sched_in)
    334		amd_brs_poison_buffer();
    335}
    336
    337/*
    338 * called from ACPI processor_idle.c or acpi_pad.c
    339 * with interrupts disabled
    340 */
    341void perf_amd_brs_lopwr_cb(bool lopwr_in)
    342{
    343	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
    344	union amd_debug_extn_cfg cfg;
    345
    346	/*
    347	 * on mwait in, we may end up in non C0 state.
    348	 * we must disable branch sampling to avoid holding the NMI
    349	 * for too long. We disable it in hardware but we
    350	 * keep the state in cpuc, so we can re-enable.
    351	 *
    352	 * The hardware will deliver the NMI if needed when brsmen cleared
    353	 */
    354	if (cpuc->brs_active) {
    355		cfg.val = get_debug_extn_cfg();
    356		cfg.brsmen = !lopwr_in;
    357		set_debug_extn_cfg(cfg.val);
    358	}
    359}
    360
    361DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
    362EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb);
    363
    364void __init amd_brs_lopwr_init(void)
    365{
    366	static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
    367}