cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

pmu-emul.c (28924B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2015 Linaro Ltd.
      4 * Author: Shannon Zhao <shannon.zhao@linaro.org>
      5 */
      6
      7#include <linux/cpu.h>
      8#include <linux/kvm.h>
      9#include <linux/kvm_host.h>
     10#include <linux/list.h>
     11#include <linux/perf_event.h>
     12#include <linux/perf/arm_pmu.h>
     13#include <linux/uaccess.h>
     14#include <asm/kvm_emulate.h>
     15#include <kvm/arm_pmu.h>
     16#include <kvm/arm_vgic.h>
     17
     18DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
     19
     20static LIST_HEAD(arm_pmus);
     21static DEFINE_MUTEX(arm_pmus_lock);
     22
     23static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
     24static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
     25static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
     26
     27#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
     28
     29static u32 kvm_pmu_event_mask(struct kvm *kvm)
     30{
     31	unsigned int pmuver;
     32
     33	pmuver = kvm->arch.arm_pmu->pmuver;
     34
     35	switch (pmuver) {
     36	case ID_AA64DFR0_PMUVER_8_0:
     37		return GENMASK(9, 0);
     38	case ID_AA64DFR0_PMUVER_8_1:
     39	case ID_AA64DFR0_PMUVER_8_4:
     40	case ID_AA64DFR0_PMUVER_8_5:
     41	case ID_AA64DFR0_PMUVER_8_7:
     42		return GENMASK(15, 0);
     43	default:		/* Shouldn't be here, just for sanity */
     44		WARN_ONCE(1, "Unknown PMU version %d\n", pmuver);
     45		return 0;
     46	}
     47}
     48
     49/**
     50 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
     51 * @vcpu: The vcpu pointer
     52 * @select_idx: The counter index
     53 */
     54static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
     55{
     56	return (select_idx == ARMV8_PMU_CYCLE_IDX &&
     57		__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
     58}
     59
     60static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
     61{
     62	struct kvm_pmu *pmu;
     63	struct kvm_vcpu_arch *vcpu_arch;
     64
     65	pmc -= pmc->idx;
     66	pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
     67	vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
     68	return container_of(vcpu_arch, struct kvm_vcpu, arch);
     69}
     70
     71/**
     72 * kvm_pmu_pmc_is_chained - determine if the pmc is chained
     73 * @pmc: The PMU counter pointer
     74 */
     75static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
     76{
     77	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
     78
     79	return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
     80}
     81
     82/**
     83 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
     84 * @select_idx: The counter index
     85 */
     86static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
     87{
     88	return select_idx & 0x1;
     89}
     90
     91/**
     92 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
     93 * @pmc: The PMU counter pointer
     94 *
     95 * When a pair of PMCs are chained together we use the low counter (canonical)
     96 * to hold the underlying perf event.
     97 */
     98static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
     99{
    100	if (kvm_pmu_pmc_is_chained(pmc) &&
    101	    kvm_pmu_idx_is_high_counter(pmc->idx))
    102		return pmc - 1;
    103
    104	return pmc;
    105}
    106static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
    107{
    108	if (kvm_pmu_idx_is_high_counter(pmc->idx))
    109		return pmc - 1;
    110	else
    111		return pmc + 1;
    112}
    113
    114/**
    115 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
    116 * @vcpu: The vcpu pointer
    117 * @select_idx: The counter index
    118 */
    119static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
    120{
    121	u64 eventsel, reg;
    122
    123	select_idx |= 0x1;
    124
    125	if (select_idx == ARMV8_PMU_CYCLE_IDX)
    126		return false;
    127
    128	reg = PMEVTYPER0_EL0 + select_idx;
    129	eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
    130
    131	return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
    132}
    133
    134/**
    135 * kvm_pmu_get_pair_counter_value - get PMU counter value
    136 * @vcpu: The vcpu pointer
    137 * @pmc: The PMU counter pointer
    138 */
    139static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
    140					  struct kvm_pmc *pmc)
    141{
    142	u64 counter, counter_high, reg, enabled, running;
    143
    144	if (kvm_pmu_pmc_is_chained(pmc)) {
    145		pmc = kvm_pmu_get_canonical_pmc(pmc);
    146		reg = PMEVCNTR0_EL0 + pmc->idx;
    147
    148		counter = __vcpu_sys_reg(vcpu, reg);
    149		counter_high = __vcpu_sys_reg(vcpu, reg + 1);
    150
    151		counter = lower_32_bits(counter) | (counter_high << 32);
    152	} else {
    153		reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
    154		      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
    155		counter = __vcpu_sys_reg(vcpu, reg);
    156	}
    157
    158	/*
    159	 * The real counter value is equal to the value of counter register plus
    160	 * the value perf event counts.
    161	 */
    162	if (pmc->perf_event)
    163		counter += perf_event_read_value(pmc->perf_event, &enabled,
    164						 &running);
    165
    166	return counter;
    167}
    168
    169/**
    170 * kvm_pmu_get_counter_value - get PMU counter value
    171 * @vcpu: The vcpu pointer
    172 * @select_idx: The counter index
    173 */
    174u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
    175{
    176	u64 counter;
    177	struct kvm_pmu *pmu = &vcpu->arch.pmu;
    178	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
    179
    180	if (!kvm_vcpu_has_pmu(vcpu))
    181		return 0;
    182
    183	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
    184
    185	if (kvm_pmu_pmc_is_chained(pmc) &&
    186	    kvm_pmu_idx_is_high_counter(select_idx))
    187		counter = upper_32_bits(counter);
    188	else if (select_idx != ARMV8_PMU_CYCLE_IDX)
    189		counter = lower_32_bits(counter);
    190
    191	return counter;
    192}
    193
    194/**
    195 * kvm_pmu_set_counter_value - set PMU counter value
    196 * @vcpu: The vcpu pointer
    197 * @select_idx: The counter index
    198 * @val: The counter value
    199 */
    200void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
    201{
    202	u64 reg;
    203
    204	if (!kvm_vcpu_has_pmu(vcpu))
    205		return;
    206
    207	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
    208	      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
    209	__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
    210
    211	/* Recreate the perf event to reflect the updated sample_period */
    212	kvm_pmu_create_perf_event(vcpu, select_idx);
    213}
    214
    215/**
    216 * kvm_pmu_release_perf_event - remove the perf event
    217 * @pmc: The PMU counter pointer
    218 */
    219static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
    220{
    221	pmc = kvm_pmu_get_canonical_pmc(pmc);
    222	if (pmc->perf_event) {
    223		perf_event_disable(pmc->perf_event);
    224		perf_event_release_kernel(pmc->perf_event);
    225		pmc->perf_event = NULL;
    226	}
    227}
    228
    229/**
    230 * kvm_pmu_stop_counter - stop PMU counter
    231 * @pmc: The PMU counter pointer
    232 *
    233 * If this counter has been configured to monitor some event, release it here.
    234 */
    235static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
    236{
    237	u64 counter, reg, val;
    238
    239	pmc = kvm_pmu_get_canonical_pmc(pmc);
    240	if (!pmc->perf_event)
    241		return;
    242
    243	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
    244
    245	if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
    246		reg = PMCCNTR_EL0;
    247		val = counter;
    248	} else {
    249		reg = PMEVCNTR0_EL0 + pmc->idx;
    250		val = lower_32_bits(counter);
    251	}
    252
    253	__vcpu_sys_reg(vcpu, reg) = val;
    254
    255	if (kvm_pmu_pmc_is_chained(pmc))
    256		__vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
    257
    258	kvm_pmu_release_perf_event(pmc);
    259}
    260
    261/**
    262 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
    263 * @vcpu: The vcpu pointer
    264 *
    265 */
    266void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
    267{
    268	int i;
    269	struct kvm_pmu *pmu = &vcpu->arch.pmu;
    270
    271	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
    272		pmu->pmc[i].idx = i;
    273}
    274
    275/**
    276 * kvm_pmu_vcpu_reset - reset pmu state for cpu
    277 * @vcpu: The vcpu pointer
    278 *
    279 */
    280void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
    281{
    282	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
    283	struct kvm_pmu *pmu = &vcpu->arch.pmu;
    284	int i;
    285
    286	for_each_set_bit(i, &mask, 32)
    287		kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
    288
    289	bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
    290}
    291
    292/**
    293 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
    294 * @vcpu: The vcpu pointer
    295 *
    296 */
    297void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
    298{
    299	int i;
    300	struct kvm_pmu *pmu = &vcpu->arch.pmu;
    301
    302	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
    303		kvm_pmu_release_perf_event(&pmu->pmc[i]);
    304	irq_work_sync(&vcpu->arch.pmu.overflow_work);
    305}
    306
    307u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
    308{
    309	u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
    310
    311	val &= ARMV8_PMU_PMCR_N_MASK;
    312	if (val == 0)
    313		return BIT(ARMV8_PMU_CYCLE_IDX);
    314	else
    315		return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
    316}
    317
    318/**
    319 * kvm_pmu_enable_counter_mask - enable selected PMU counters
    320 * @vcpu: The vcpu pointer
    321 * @val: the value guest writes to PMCNTENSET register
    322 *
    323 * Call perf_event_enable to start counting the perf event
    324 */
    325void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
    326{
    327	int i;
    328	struct kvm_pmu *pmu = &vcpu->arch.pmu;
    329	struct kvm_pmc *pmc;
    330
    331	if (!kvm_vcpu_has_pmu(vcpu))
    332		return;
    333
    334	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
    335		return;
    336
    337	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
    338		if (!(val & BIT(i)))
    339			continue;
    340
    341		pmc = &pmu->pmc[i];
    342
    343		/* A change in the enable state may affect the chain state */
    344		kvm_pmu_update_pmc_chained(vcpu, i);
    345		kvm_pmu_create_perf_event(vcpu, i);
    346
    347		/* At this point, pmc must be the canonical */
    348		if (pmc->perf_event) {
    349			perf_event_enable(pmc->perf_event);
    350			if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
    351				kvm_debug("fail to enable perf event\n");
    352		}
    353	}
    354}
    355
    356/**
    357 * kvm_pmu_disable_counter_mask - disable selected PMU counters
    358 * @vcpu: The vcpu pointer
    359 * @val: the value guest writes to PMCNTENCLR register
    360 *
    361 * Call perf_event_disable to stop counting the perf event
    362 */
    363void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
    364{
    365	int i;
    366	struct kvm_pmu *pmu = &vcpu->arch.pmu;
    367	struct kvm_pmc *pmc;
    368
    369	if (!kvm_vcpu_has_pmu(vcpu) || !val)
    370		return;
    371
    372	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
    373		if (!(val & BIT(i)))
    374			continue;
    375
    376		pmc = &pmu->pmc[i];
    377
    378		/* A change in the enable state may affect the chain state */
    379		kvm_pmu_update_pmc_chained(vcpu, i);
    380		kvm_pmu_create_perf_event(vcpu, i);
    381
    382		/* At this point, pmc must be the canonical */
    383		if (pmc->perf_event)
    384			perf_event_disable(pmc->perf_event);
    385	}
    386}
    387
    388static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
    389{
    390	u64 reg = 0;
    391
    392	if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
    393		reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
    394		reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
    395		reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
    396	}
    397
    398	return reg;
    399}
    400
    401static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
    402{
    403	struct kvm_pmu *pmu = &vcpu->arch.pmu;
    404	bool overflow;
    405
    406	if (!kvm_vcpu_has_pmu(vcpu))
    407		return;
    408
    409	overflow = !!kvm_pmu_overflow_status(vcpu);
    410	if (pmu->irq_level == overflow)
    411		return;
    412
    413	pmu->irq_level = overflow;
    414
    415	if (likely(irqchip_in_kernel(vcpu->kvm))) {
    416		int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
    417					      pmu->irq_num, overflow, pmu);
    418		WARN_ON(ret);
    419	}
    420}
    421
    422bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
    423{
    424	struct kvm_pmu *pmu = &vcpu->arch.pmu;
    425	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
    426	bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
    427
    428	if (likely(irqchip_in_kernel(vcpu->kvm)))
    429		return false;
    430
    431	return pmu->irq_level != run_level;
    432}
    433
    434/*
    435 * Reflect the PMU overflow interrupt output level into the kvm_run structure
    436 */
    437void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
    438{
    439	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
    440
    441	/* Populate the timer bitmap for user space */
    442	regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
    443	if (vcpu->arch.pmu.irq_level)
    444		regs->device_irq_level |= KVM_ARM_DEV_PMU;
    445}
    446
    447/**
    448 * kvm_pmu_flush_hwstate - flush pmu state to cpu
    449 * @vcpu: The vcpu pointer
    450 *
    451 * Check if the PMU has overflowed while we were running in the host, and inject
    452 * an interrupt if that was the case.
    453 */
    454void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
    455{
    456	kvm_pmu_update_state(vcpu);
    457}
    458
    459/**
    460 * kvm_pmu_sync_hwstate - sync pmu state from cpu
    461 * @vcpu: The vcpu pointer
    462 *
    463 * Check if the PMU has overflowed while we were running in the guest, and
    464 * inject an interrupt if that was the case.
    465 */
    466void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
    467{
    468	kvm_pmu_update_state(vcpu);
    469}
    470
    471/**
    472 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
    473 * to the event.
    474 * This is why we need a callback to do it once outside of the NMI context.
    475 */
    476static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
    477{
    478	struct kvm_vcpu *vcpu;
    479	struct kvm_pmu *pmu;
    480
    481	pmu = container_of(work, struct kvm_pmu, overflow_work);
    482	vcpu = kvm_pmc_to_vcpu(pmu->pmc);
    483
    484	kvm_vcpu_kick(vcpu);
    485}
    486
    487/**
    488 * When the perf event overflows, set the overflow status and inform the vcpu.
    489 */
    490static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
    491				  struct perf_sample_data *data,
    492				  struct pt_regs *regs)
    493{
    494	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
    495	struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
    496	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
    497	int idx = pmc->idx;
    498	u64 period;
    499
    500	cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
    501
    502	/*
    503	 * Reset the sample period to the architectural limit,
    504	 * i.e. the point where the counter overflows.
    505	 */
    506	period = -(local64_read(&perf_event->count));
    507
    508	if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
    509		period &= GENMASK(31, 0);
    510
    511	local64_set(&perf_event->hw.period_left, 0);
    512	perf_event->attr.sample_period = period;
    513	perf_event->hw.sample_period = period;
    514
    515	__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
    516
    517	if (kvm_pmu_overflow_status(vcpu)) {
    518		kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
    519
    520		if (!in_nmi())
    521			kvm_vcpu_kick(vcpu);
    522		else
    523			irq_work_queue(&vcpu->arch.pmu.overflow_work);
    524	}
    525
    526	cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
    527}
    528
    529/**
    530 * kvm_pmu_software_increment - do software increment
    531 * @vcpu: The vcpu pointer
    532 * @val: the value guest writes to PMSWINC register
    533 */
    534void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
    535{
    536	struct kvm_pmu *pmu = &vcpu->arch.pmu;
    537	int i;
    538
    539	if (!kvm_vcpu_has_pmu(vcpu))
    540		return;
    541
    542	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
    543		return;
    544
    545	/* Weed out disabled counters */
    546	val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
    547
    548	for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
    549		u64 type, reg;
    550
    551		if (!(val & BIT(i)))
    552			continue;
    553
    554		/* PMSWINC only applies to ... SW_INC! */
    555		type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
    556		type &= kvm_pmu_event_mask(vcpu->kvm);
    557		if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
    558			continue;
    559
    560		/* increment this even SW_INC counter */
    561		reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
    562		reg = lower_32_bits(reg);
    563		__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
    564
    565		if (reg) /* no overflow on the low part */
    566			continue;
    567
    568		if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
    569			/* increment the high counter */
    570			reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
    571			reg = lower_32_bits(reg);
    572			__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
    573			if (!reg) /* mark overflow on the high counter */
    574				__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
    575		} else {
    576			/* mark overflow on low counter */
    577			__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
    578		}
    579	}
    580}
    581
    582/**
    583 * kvm_pmu_handle_pmcr - handle PMCR register
    584 * @vcpu: The vcpu pointer
    585 * @val: the value guest writes to PMCR register
    586 */
    587void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
    588{
    589	int i;
    590
    591	if (!kvm_vcpu_has_pmu(vcpu))
    592		return;
    593
    594	if (val & ARMV8_PMU_PMCR_E) {
    595		kvm_pmu_enable_counter_mask(vcpu,
    596		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
    597	} else {
    598		kvm_pmu_disable_counter_mask(vcpu,
    599		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
    600	}
    601
    602	if (val & ARMV8_PMU_PMCR_C)
    603		kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
    604
    605	if (val & ARMV8_PMU_PMCR_P) {
    606		unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
    607		mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
    608		for_each_set_bit(i, &mask, 32)
    609			kvm_pmu_set_counter_value(vcpu, i, 0);
    610	}
    611}
    612
    613static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
    614{
    615	return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
    616	       (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
    617}
    618
    619/**
    620 * kvm_pmu_create_perf_event - create a perf event for a counter
    621 * @vcpu: The vcpu pointer
    622 * @select_idx: The number of selected counter
    623 */
    624static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
    625{
    626	struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
    627	struct kvm_pmu *pmu = &vcpu->arch.pmu;
    628	struct kvm_pmc *pmc;
    629	struct perf_event *event;
    630	struct perf_event_attr attr;
    631	u64 eventsel, counter, reg, data;
    632
    633	/*
    634	 * For chained counters the event type and filtering attributes are
    635	 * obtained from the low/even counter. We also use this counter to
    636	 * determine if the event is enabled/disabled.
    637	 */
    638	pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
    639
    640	reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
    641	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
    642	data = __vcpu_sys_reg(vcpu, reg);
    643
    644	kvm_pmu_stop_counter(vcpu, pmc);
    645	if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
    646		eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
    647	else
    648		eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
    649
    650	/* Software increment event doesn't need to be backed by a perf event */
    651	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
    652		return;
    653
    654	/*
    655	 * If we have a filter in place and that the event isn't allowed, do
    656	 * not install a perf event either.
    657	 */
    658	if (vcpu->kvm->arch.pmu_filter &&
    659	    !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
    660		return;
    661
    662	memset(&attr, 0, sizeof(struct perf_event_attr));
    663	attr.type = arm_pmu->pmu.type;
    664	attr.size = sizeof(attr);
    665	attr.pinned = 1;
    666	attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
    667	attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
    668	attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
    669	attr.exclude_hv = 1; /* Don't count EL2 events */
    670	attr.exclude_host = 1; /* Don't count host events */
    671	attr.config = eventsel;
    672
    673	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
    674
    675	if (kvm_pmu_pmc_is_chained(pmc)) {
    676		/**
    677		 * The initial sample period (overflow count) of an event. For
    678		 * chained counters we only support overflow interrupts on the
    679		 * high counter.
    680		 */
    681		attr.sample_period = (-counter) & GENMASK(63, 0);
    682		attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
    683
    684		event = perf_event_create_kernel_counter(&attr, -1, current,
    685							 kvm_pmu_perf_overflow,
    686							 pmc + 1);
    687	} else {
    688		/* The initial sample period (overflow count) of an event. */
    689		if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
    690			attr.sample_period = (-counter) & GENMASK(63, 0);
    691		else
    692			attr.sample_period = (-counter) & GENMASK(31, 0);
    693
    694		event = perf_event_create_kernel_counter(&attr, -1, current,
    695						 kvm_pmu_perf_overflow, pmc);
    696	}
    697
    698	if (IS_ERR(event)) {
    699		pr_err_once("kvm: pmu event creation failed %ld\n",
    700			    PTR_ERR(event));
    701		return;
    702	}
    703
    704	pmc->perf_event = event;
    705}
    706
    707/**
    708 * kvm_pmu_update_pmc_chained - update chained bitmap
    709 * @vcpu: The vcpu pointer
    710 * @select_idx: The number of selected counter
    711 *
    712 * Update the chained bitmap based on the event type written in the
    713 * typer register and the enable state of the odd register.
    714 */
    715static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
    716{
    717	struct kvm_pmu *pmu = &vcpu->arch.pmu;
    718	struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
    719	bool new_state, old_state;
    720
    721	old_state = kvm_pmu_pmc_is_chained(pmc);
    722	new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
    723		    kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
    724
    725	if (old_state == new_state)
    726		return;
    727
    728	canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
    729	kvm_pmu_stop_counter(vcpu, canonical_pmc);
    730	if (new_state) {
    731		/*
    732		 * During promotion from !chained to chained we must ensure
    733		 * the adjacent counter is stopped and its event destroyed
    734		 */
    735		kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
    736		set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
    737		return;
    738	}
    739	clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
    740}
    741
    742/**
    743 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
    744 * @vcpu: The vcpu pointer
    745 * @data: The data guest writes to PMXEVTYPER_EL0
    746 * @select_idx: The number of selected counter
    747 *
    748 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
    749 * event with given hardware event number. Here we call perf_event API to
    750 * emulate this action and create a kernel perf event for it.
    751 */
    752void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
    753				    u64 select_idx)
    754{
    755	u64 reg, mask;
    756
    757	if (!kvm_vcpu_has_pmu(vcpu))
    758		return;
    759
    760	mask  =  ARMV8_PMU_EVTYPE_MASK;
    761	mask &= ~ARMV8_PMU_EVTYPE_EVENT;
    762	mask |= kvm_pmu_event_mask(vcpu->kvm);
    763
    764	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
    765	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
    766
    767	__vcpu_sys_reg(vcpu, reg) = data & mask;
    768
    769	kvm_pmu_update_pmc_chained(vcpu, select_idx);
    770	kvm_pmu_create_perf_event(vcpu, select_idx);
    771}
    772
    773void kvm_host_pmu_init(struct arm_pmu *pmu)
    774{
    775	struct arm_pmu_entry *entry;
    776
    777	if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
    778		return;
    779
    780	mutex_lock(&arm_pmus_lock);
    781
    782	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
    783	if (!entry)
    784		goto out_unlock;
    785
    786	entry->arm_pmu = pmu;
    787	list_add_tail(&entry->entry, &arm_pmus);
    788
    789	if (list_is_singular(&arm_pmus))
    790		static_branch_enable(&kvm_arm_pmu_available);
    791
    792out_unlock:
    793	mutex_unlock(&arm_pmus_lock);
    794}
    795
    796static struct arm_pmu *kvm_pmu_probe_armpmu(void)
    797{
    798	struct perf_event_attr attr = { };
    799	struct perf_event *event;
    800	struct arm_pmu *pmu = NULL;
    801
    802	/*
    803	 * Create a dummy event that only counts user cycles. As we'll never
    804	 * leave this function with the event being live, it will never
    805	 * count anything. But it allows us to probe some of the PMU
    806	 * details. Yes, this is terrible.
    807	 */
    808	attr.type = PERF_TYPE_RAW;
    809	attr.size = sizeof(attr);
    810	attr.pinned = 1;
    811	attr.disabled = 0;
    812	attr.exclude_user = 0;
    813	attr.exclude_kernel = 1;
    814	attr.exclude_hv = 1;
    815	attr.exclude_host = 1;
    816	attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
    817	attr.sample_period = GENMASK(63, 0);
    818
    819	event = perf_event_create_kernel_counter(&attr, -1, current,
    820						 kvm_pmu_perf_overflow, &attr);
    821
    822	if (IS_ERR(event)) {
    823		pr_err_once("kvm: pmu event creation failed %ld\n",
    824			    PTR_ERR(event));
    825		return NULL;
    826	}
    827
    828	if (event->pmu) {
    829		pmu = to_arm_pmu(event->pmu);
    830		if (pmu->pmuver == 0 ||
    831		    pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
    832			pmu = NULL;
    833	}
    834
    835	perf_event_disable(event);
    836	perf_event_release_kernel(event);
    837
    838	return pmu;
    839}
    840
    841u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
    842{
    843	unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
    844	u64 val, mask = 0;
    845	int base, i, nr_events;
    846
    847	if (!kvm_vcpu_has_pmu(vcpu))
    848		return 0;
    849
    850	if (!pmceid1) {
    851		val = read_sysreg(pmceid0_el0);
    852		base = 0;
    853	} else {
    854		val = read_sysreg(pmceid1_el0);
    855		/*
    856		 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
    857		 * as RAZ
    858		 */
    859		if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_4)
    860			val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
    861		base = 32;
    862	}
    863
    864	if (!bmap)
    865		return val;
    866
    867	nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
    868
    869	for (i = 0; i < 32; i += 8) {
    870		u64 byte;
    871
    872		byte = bitmap_get_value8(bmap, base + i);
    873		mask |= byte << i;
    874		if (nr_events >= (0x4000 + base + 32)) {
    875			byte = bitmap_get_value8(bmap, 0x4000 + base + i);
    876			mask |= byte << (32 + i);
    877		}
    878	}
    879
    880	return val & mask;
    881}
    882
    883int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
    884{
    885	if (!kvm_vcpu_has_pmu(vcpu))
    886		return 0;
    887
    888	if (!vcpu->arch.pmu.created)
    889		return -EINVAL;
    890
    891	/*
    892	 * A valid interrupt configuration for the PMU is either to have a
    893	 * properly configured interrupt number and using an in-kernel
    894	 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
    895	 */
    896	if (irqchip_in_kernel(vcpu->kvm)) {
    897		int irq = vcpu->arch.pmu.irq_num;
    898		/*
    899		 * If we are using an in-kernel vgic, at this point we know
    900		 * the vgic will be initialized, so we can check the PMU irq
    901		 * number against the dimensions of the vgic and make sure
    902		 * it's valid.
    903		 */
    904		if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
    905			return -EINVAL;
    906	} else if (kvm_arm_pmu_irq_initialized(vcpu)) {
    907		   return -EINVAL;
    908	}
    909
    910	/* One-off reload of the PMU on first run */
    911	kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
    912
    913	return 0;
    914}
    915
    916static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
    917{
    918	if (irqchip_in_kernel(vcpu->kvm)) {
    919		int ret;
    920
    921		/*
    922		 * If using the PMU with an in-kernel virtual GIC
    923		 * implementation, we require the GIC to be already
    924		 * initialized when initializing the PMU.
    925		 */
    926		if (!vgic_initialized(vcpu->kvm))
    927			return -ENODEV;
    928
    929		if (!kvm_arm_pmu_irq_initialized(vcpu))
    930			return -ENXIO;
    931
    932		ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
    933					 &vcpu->arch.pmu);
    934		if (ret)
    935			return ret;
    936	}
    937
    938	init_irq_work(&vcpu->arch.pmu.overflow_work,
    939		      kvm_pmu_perf_overflow_notify_vcpu);
    940
    941	vcpu->arch.pmu.created = true;
    942	return 0;
    943}
    944
    945/*
    946 * For one VM the interrupt type must be same for each vcpu.
    947 * As a PPI, the interrupt number is the same for all vcpus,
    948 * while as an SPI it must be a separate number per vcpu.
    949 */
    950static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
    951{
    952	unsigned long i;
    953	struct kvm_vcpu *vcpu;
    954
    955	kvm_for_each_vcpu(i, vcpu, kvm) {
    956		if (!kvm_arm_pmu_irq_initialized(vcpu))
    957			continue;
    958
    959		if (irq_is_ppi(irq)) {
    960			if (vcpu->arch.pmu.irq_num != irq)
    961				return false;
    962		} else {
    963			if (vcpu->arch.pmu.irq_num == irq)
    964				return false;
    965		}
    966	}
    967
    968	return true;
    969}
    970
    971static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
    972{
    973	struct kvm *kvm = vcpu->kvm;
    974	struct arm_pmu_entry *entry;
    975	struct arm_pmu *arm_pmu;
    976	int ret = -ENXIO;
    977
    978	mutex_lock(&kvm->lock);
    979	mutex_lock(&arm_pmus_lock);
    980
    981	list_for_each_entry(entry, &arm_pmus, entry) {
    982		arm_pmu = entry->arm_pmu;
    983		if (arm_pmu->pmu.type == pmu_id) {
    984			if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) ||
    985			    (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
    986				ret = -EBUSY;
    987				break;
    988			}
    989
    990			kvm->arch.arm_pmu = arm_pmu;
    991			cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus);
    992			ret = 0;
    993			break;
    994		}
    995	}
    996
    997	mutex_unlock(&arm_pmus_lock);
    998	mutex_unlock(&kvm->lock);
    999	return ret;
   1000}
   1001
   1002int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
   1003{
   1004	struct kvm *kvm = vcpu->kvm;
   1005
   1006	if (!kvm_vcpu_has_pmu(vcpu))
   1007		return -ENODEV;
   1008
   1009	if (vcpu->arch.pmu.created)
   1010		return -EBUSY;
   1011
   1012	mutex_lock(&kvm->lock);
   1013	if (!kvm->arch.arm_pmu) {
   1014		/* No PMU set, get the default one */
   1015		kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
   1016		if (!kvm->arch.arm_pmu) {
   1017			mutex_unlock(&kvm->lock);
   1018			return -ENODEV;
   1019		}
   1020	}
   1021	mutex_unlock(&kvm->lock);
   1022
   1023	switch (attr->attr) {
   1024	case KVM_ARM_VCPU_PMU_V3_IRQ: {
   1025		int __user *uaddr = (int __user *)(long)attr->addr;
   1026		int irq;
   1027
   1028		if (!irqchip_in_kernel(kvm))
   1029			return -EINVAL;
   1030
   1031		if (get_user(irq, uaddr))
   1032			return -EFAULT;
   1033
   1034		/* The PMU overflow interrupt can be a PPI or a valid SPI. */
   1035		if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
   1036			return -EINVAL;
   1037
   1038		if (!pmu_irq_is_valid(kvm, irq))
   1039			return -EINVAL;
   1040
   1041		if (kvm_arm_pmu_irq_initialized(vcpu))
   1042			return -EBUSY;
   1043
   1044		kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
   1045		vcpu->arch.pmu.irq_num = irq;
   1046		return 0;
   1047	}
   1048	case KVM_ARM_VCPU_PMU_V3_FILTER: {
   1049		struct kvm_pmu_event_filter __user *uaddr;
   1050		struct kvm_pmu_event_filter filter;
   1051		int nr_events;
   1052
   1053		nr_events = kvm_pmu_event_mask(kvm) + 1;
   1054
   1055		uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
   1056
   1057		if (copy_from_user(&filter, uaddr, sizeof(filter)))
   1058			return -EFAULT;
   1059
   1060		if (((u32)filter.base_event + filter.nevents) > nr_events ||
   1061		    (filter.action != KVM_PMU_EVENT_ALLOW &&
   1062		     filter.action != KVM_PMU_EVENT_DENY))
   1063			return -EINVAL;
   1064
   1065		mutex_lock(&kvm->lock);
   1066
   1067		if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) {
   1068			mutex_unlock(&kvm->lock);
   1069			return -EBUSY;
   1070		}
   1071
   1072		if (!kvm->arch.pmu_filter) {
   1073			kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
   1074			if (!kvm->arch.pmu_filter) {
   1075				mutex_unlock(&kvm->lock);
   1076				return -ENOMEM;
   1077			}
   1078
   1079			/*
   1080			 * The default depends on the first applied filter.
   1081			 * If it allows events, the default is to deny.
   1082			 * Conversely, if the first filter denies a set of
   1083			 * events, the default is to allow.
   1084			 */
   1085			if (filter.action == KVM_PMU_EVENT_ALLOW)
   1086				bitmap_zero(kvm->arch.pmu_filter, nr_events);
   1087			else
   1088				bitmap_fill(kvm->arch.pmu_filter, nr_events);
   1089		}
   1090
   1091		if (filter.action == KVM_PMU_EVENT_ALLOW)
   1092			bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
   1093		else
   1094			bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
   1095
   1096		mutex_unlock(&kvm->lock);
   1097
   1098		return 0;
   1099	}
   1100	case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
   1101		int __user *uaddr = (int __user *)(long)attr->addr;
   1102		int pmu_id;
   1103
   1104		if (get_user(pmu_id, uaddr))
   1105			return -EFAULT;
   1106
   1107		return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
   1108	}
   1109	case KVM_ARM_VCPU_PMU_V3_INIT:
   1110		return kvm_arm_pmu_v3_init(vcpu);
   1111	}
   1112
   1113	return -ENXIO;
   1114}
   1115
   1116int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
   1117{
   1118	switch (attr->attr) {
   1119	case KVM_ARM_VCPU_PMU_V3_IRQ: {
   1120		int __user *uaddr = (int __user *)(long)attr->addr;
   1121		int irq;
   1122
   1123		if (!irqchip_in_kernel(vcpu->kvm))
   1124			return -EINVAL;
   1125
   1126		if (!kvm_vcpu_has_pmu(vcpu))
   1127			return -ENODEV;
   1128
   1129		if (!kvm_arm_pmu_irq_initialized(vcpu))
   1130			return -ENXIO;
   1131
   1132		irq = vcpu->arch.pmu.irq_num;
   1133		return put_user(irq, uaddr);
   1134	}
   1135	}
   1136
   1137	return -ENXIO;
   1138}
   1139
   1140int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
   1141{
   1142	switch (attr->attr) {
   1143	case KVM_ARM_VCPU_PMU_V3_IRQ:
   1144	case KVM_ARM_VCPU_PMU_V3_INIT:
   1145	case KVM_ARM_VCPU_PMU_V3_FILTER:
   1146	case KVM_ARM_VCPU_PMU_V3_SET_PMU:
   1147		if (kvm_vcpu_has_pmu(vcpu))
   1148			return 0;
   1149	}
   1150
   1151	return -ENXIO;
   1152}