cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

lapic.c (79064B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2
      3/*
      4 * Local APIC virtualization
      5 *
      6 * Copyright (C) 2006 Qumranet, Inc.
      7 * Copyright (C) 2007 Novell
      8 * Copyright (C) 2007 Intel
      9 * Copyright 2009 Red Hat, Inc. and/or its affiliates.
     10 *
     11 * Authors:
     12 *   Dor Laor <dor.laor@qumranet.com>
     13 *   Gregory Haskins <ghaskins@novell.com>
     14 *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
     15 *
     16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
     17 */
     18
     19#include <linux/kvm_host.h>
     20#include <linux/kvm.h>
     21#include <linux/mm.h>
     22#include <linux/highmem.h>
     23#include <linux/smp.h>
     24#include <linux/hrtimer.h>
     25#include <linux/io.h>
     26#include <linux/export.h>
     27#include <linux/math64.h>
     28#include <linux/slab.h>
     29#include <asm/processor.h>
     30#include <asm/msr.h>
     31#include <asm/page.h>
     32#include <asm/current.h>
     33#include <asm/apicdef.h>
     34#include <asm/delay.h>
     35#include <linux/atomic.h>
     36#include <linux/jump_label.h>
     37#include "kvm_cache_regs.h"
     38#include "irq.h"
     39#include "ioapic.h"
     40#include "trace.h"
     41#include "x86.h"
     42#include "cpuid.h"
     43#include "hyperv.h"
     44
     45#ifndef CONFIG_X86_64
     46#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
     47#else
     48#define mod_64(x, y) ((x) % (y))
     49#endif
     50
     51#define PRId64 "d"
     52#define PRIx64 "llx"
     53#define PRIu64 "u"
     54#define PRIo64 "o"
     55
     56/* 14 is the version for Xeon and Pentium 8.4.8*/
     57#define APIC_VERSION			(0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
     58#define LAPIC_MMIO_LENGTH		(1 << 12)
     59/* followed define is not in apicdef.h */
     60#define MAX_APIC_VECTOR			256
     61#define APIC_VECTORS_PER_REG		32
     62
     63static bool lapic_timer_advance_dynamic __read_mostly;
     64#define LAPIC_TIMER_ADVANCE_ADJUST_MIN	100	/* clock cycles */
     65#define LAPIC_TIMER_ADVANCE_ADJUST_MAX	10000	/* clock cycles */
     66#define LAPIC_TIMER_ADVANCE_NS_INIT	1000
     67#define LAPIC_TIMER_ADVANCE_NS_MAX     5000
     68/* step-by-step approximation to mitigate fluctuation */
     69#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
     70
     71static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val)
     72{
     73	*((u32 *) (regs + reg_off)) = val;
     74}
     75
     76static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
     77{
     78	__kvm_lapic_set_reg(apic->regs, reg_off, val);
     79}
     80
     81static __always_inline u64 __kvm_lapic_get_reg64(char *regs, int reg)
     82{
     83	BUILD_BUG_ON(reg != APIC_ICR);
     84	return *((u64 *) (regs + reg));
     85}
     86
     87static __always_inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg)
     88{
     89	return __kvm_lapic_get_reg64(apic->regs, reg);
     90}
     91
     92static __always_inline void __kvm_lapic_set_reg64(char *regs, int reg, u64 val)
     93{
     94	BUILD_BUG_ON(reg != APIC_ICR);
     95	*((u64 *) (regs + reg)) = val;
     96}
     97
     98static __always_inline void kvm_lapic_set_reg64(struct kvm_lapic *apic,
     99						int reg, u64 val)
    100{
    101	__kvm_lapic_set_reg64(apic->regs, reg, val);
    102}
    103
    104static inline int apic_test_vector(int vec, void *bitmap)
    105{
    106	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
    107}
    108
    109bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
    110{
    111	struct kvm_lapic *apic = vcpu->arch.apic;
    112
    113	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
    114		apic_test_vector(vector, apic->regs + APIC_IRR);
    115}
    116
    117static inline int __apic_test_and_set_vector(int vec, void *bitmap)
    118{
    119	return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
    120}
    121
    122static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
    123{
    124	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
    125}
    126
    127__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
    128__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
    129
    130static inline int apic_enabled(struct kvm_lapic *apic)
    131{
    132	return kvm_apic_sw_enabled(apic) &&	kvm_apic_hw_enabled(apic);
    133}
    134
    135#define LVT_MASK	\
    136	(APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
    137
    138#define LINT_MASK	\
    139	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
    140	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
    141
    142static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
    143{
    144	return apic->vcpu->vcpu_id;
    145}
    146
    147static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
    148{
    149	return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
    150		(kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
    151}
    152
    153bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
    154{
    155	return kvm_x86_ops.set_hv_timer
    156	       && !(kvm_mwait_in_guest(vcpu->kvm) ||
    157		    kvm_can_post_timer_interrupt(vcpu));
    158}
    159EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
    160
    161static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
    162{
    163	return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
    164}
    165
    166static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
    167		u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
    168	switch (map->mode) {
    169	case KVM_APIC_MODE_X2APIC: {
    170		u32 offset = (dest_id >> 16) * 16;
    171		u32 max_apic_id = map->max_apic_id;
    172
    173		if (offset <= max_apic_id) {
    174			u8 cluster_size = min(max_apic_id - offset + 1, 16U);
    175
    176			offset = array_index_nospec(offset, map->max_apic_id + 1);
    177			*cluster = &map->phys_map[offset];
    178			*mask = dest_id & (0xffff >> (16 - cluster_size));
    179		} else {
    180			*mask = 0;
    181		}
    182
    183		return true;
    184		}
    185	case KVM_APIC_MODE_XAPIC_FLAT:
    186		*cluster = map->xapic_flat_map;
    187		*mask = dest_id & 0xff;
    188		return true;
    189	case KVM_APIC_MODE_XAPIC_CLUSTER:
    190		*cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
    191		*mask = dest_id & 0xf;
    192		return true;
    193	default:
    194		/* Not optimized. */
    195		return false;
    196	}
    197}
    198
    199static void kvm_apic_map_free(struct rcu_head *rcu)
    200{
    201	struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
    202
    203	kvfree(map);
    204}
    205
    206/*
    207 * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
    208 *
    209 * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
    210 * apic_map_lock_held.
    211 */
    212enum {
    213	CLEAN,
    214	UPDATE_IN_PROGRESS,
    215	DIRTY
    216};
    217
    218void kvm_recalculate_apic_map(struct kvm *kvm)
    219{
    220	struct kvm_apic_map *new, *old = NULL;
    221	struct kvm_vcpu *vcpu;
    222	unsigned long i;
    223	u32 max_id = 255; /* enough space for any xAPIC ID */
    224
    225	/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
    226	if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
    227		return;
    228
    229	WARN_ONCE(!irqchip_in_kernel(kvm),
    230		  "Dirty APIC map without an in-kernel local APIC");
    231
    232	mutex_lock(&kvm->arch.apic_map_lock);
    233	/*
    234	 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
    235	 * (if clean) or the APIC registers (if dirty).
    236	 */
    237	if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
    238				   DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
    239		/* Someone else has updated the map. */
    240		mutex_unlock(&kvm->arch.apic_map_lock);
    241		return;
    242	}
    243
    244	kvm_for_each_vcpu(i, vcpu, kvm)
    245		if (kvm_apic_present(vcpu))
    246			max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
    247
    248	new = kvzalloc(sizeof(struct kvm_apic_map) +
    249	                   sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
    250			   GFP_KERNEL_ACCOUNT);
    251
    252	if (!new)
    253		goto out;
    254
    255	new->max_apic_id = max_id;
    256
    257	kvm_for_each_vcpu(i, vcpu, kvm) {
    258		struct kvm_lapic *apic = vcpu->arch.apic;
    259		struct kvm_lapic **cluster;
    260		u16 mask;
    261		u32 ldr;
    262		u8 xapic_id;
    263		u32 x2apic_id;
    264
    265		if (!kvm_apic_present(vcpu))
    266			continue;
    267
    268		xapic_id = kvm_xapic_id(apic);
    269		x2apic_id = kvm_x2apic_id(apic);
    270
    271		/* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
    272		if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
    273				x2apic_id <= new->max_apic_id)
    274			new->phys_map[x2apic_id] = apic;
    275		/*
    276		 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
    277		 * prevent them from masking VCPUs with APIC ID <= 0xff.
    278		 */
    279		if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
    280			new->phys_map[xapic_id] = apic;
    281
    282		if (!kvm_apic_sw_enabled(apic))
    283			continue;
    284
    285		ldr = kvm_lapic_get_reg(apic, APIC_LDR);
    286
    287		if (apic_x2apic_mode(apic)) {
    288			new->mode |= KVM_APIC_MODE_X2APIC;
    289		} else if (ldr) {
    290			ldr = GET_APIC_LOGICAL_ID(ldr);
    291			if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
    292				new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
    293			else
    294				new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
    295		}
    296
    297		if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
    298			continue;
    299
    300		if (mask)
    301			cluster[ffs(mask) - 1] = apic;
    302	}
    303out:
    304	old = rcu_dereference_protected(kvm->arch.apic_map,
    305			lockdep_is_held(&kvm->arch.apic_map_lock));
    306	rcu_assign_pointer(kvm->arch.apic_map, new);
    307	/*
    308	 * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
    309	 * If another update has come in, leave it DIRTY.
    310	 */
    311	atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
    312			       UPDATE_IN_PROGRESS, CLEAN);
    313	mutex_unlock(&kvm->arch.apic_map_lock);
    314
    315	if (old)
    316		call_rcu(&old->rcu, kvm_apic_map_free);
    317
    318	kvm_make_scan_ioapic_request(kvm);
    319}
    320
    321static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
    322{
    323	bool enabled = val & APIC_SPIV_APIC_ENABLED;
    324
    325	kvm_lapic_set_reg(apic, APIC_SPIV, val);
    326
    327	if (enabled != apic->sw_enabled) {
    328		apic->sw_enabled = enabled;
    329		if (enabled)
    330			static_branch_slow_dec_deferred(&apic_sw_disabled);
    331		else
    332			static_branch_inc(&apic_sw_disabled.key);
    333
    334		atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
    335	}
    336
    337	/* Check if there are APF page ready requests pending */
    338	if (enabled)
    339		kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
    340}
    341
    342static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
    343{
    344	kvm_lapic_set_reg(apic, APIC_ID, id << 24);
    345	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
    346}
    347
    348static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
    349{
    350	kvm_lapic_set_reg(apic, APIC_LDR, id);
    351	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
    352}
    353
    354static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
    355{
    356	kvm_lapic_set_reg(apic, APIC_DFR, val);
    357	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
    358}
    359
    360static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
    361{
    362	return ((id >> 4) << 16) | (1 << (id & 0xf));
    363}
    364
    365static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
    366{
    367	u32 ldr = kvm_apic_calc_x2apic_ldr(id);
    368
    369	WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
    370
    371	kvm_lapic_set_reg(apic, APIC_ID, id);
    372	kvm_lapic_set_reg(apic, APIC_LDR, ldr);
    373	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
    374}
    375
    376static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
    377{
    378	return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
    379}
    380
    381static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
    382{
    383	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
    384}
    385
    386static inline int apic_lvtt_period(struct kvm_lapic *apic)
    387{
    388	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
    389}
    390
    391static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
    392{
    393	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
    394}
    395
    396static inline int apic_lvt_nmi_mode(u32 lvt_val)
    397{
    398	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
    399}
    400
    401void kvm_apic_set_version(struct kvm_vcpu *vcpu)
    402{
    403	struct kvm_lapic *apic = vcpu->arch.apic;
    404	u32 v = APIC_VERSION;
    405
    406	if (!lapic_in_kernel(vcpu))
    407		return;
    408
    409	/*
    410	 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
    411	 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
    412	 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
    413	 * version first and level-triggered interrupts never get EOIed in
    414	 * IOAPIC.
    415	 */
    416	if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
    417	    !ioapic_in_kernel(vcpu->kvm))
    418		v |= APIC_LVR_DIRECTED_EOI;
    419	kvm_lapic_set_reg(apic, APIC_LVR, v);
    420}
    421
    422static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
    423	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
    424	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
    425	LVT_MASK | APIC_MODE_MASK,	/* LVTPC */
    426	LINT_MASK, LINT_MASK,	/* LVT0-1 */
    427	LVT_MASK		/* LVTERR */
    428};
    429
    430static int find_highest_vector(void *bitmap)
    431{
    432	int vec;
    433	u32 *reg;
    434
    435	for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
    436	     vec >= 0; vec -= APIC_VECTORS_PER_REG) {
    437		reg = bitmap + REG_POS(vec);
    438		if (*reg)
    439			return __fls(*reg) + vec;
    440	}
    441
    442	return -1;
    443}
    444
    445static u8 count_vectors(void *bitmap)
    446{
    447	int vec;
    448	u32 *reg;
    449	u8 count = 0;
    450
    451	for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
    452		reg = bitmap + REG_POS(vec);
    453		count += hweight32(*reg);
    454	}
    455
    456	return count;
    457}
    458
    459bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
    460{
    461	u32 i, vec;
    462	u32 pir_val, irr_val, prev_irr_val;
    463	int max_updated_irr;
    464
    465	max_updated_irr = -1;
    466	*max_irr = -1;
    467
    468	for (i = vec = 0; i <= 7; i++, vec += 32) {
    469		pir_val = READ_ONCE(pir[i]);
    470		irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
    471		if (pir_val) {
    472			prev_irr_val = irr_val;
    473			irr_val |= xchg(&pir[i], 0);
    474			*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
    475			if (prev_irr_val != irr_val) {
    476				max_updated_irr =
    477					__fls(irr_val ^ prev_irr_val) + vec;
    478			}
    479		}
    480		if (irr_val)
    481			*max_irr = __fls(irr_val) + vec;
    482	}
    483
    484	return ((max_updated_irr != -1) &&
    485		(max_updated_irr == *max_irr));
    486}
    487EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
    488
    489bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
    490{
    491	struct kvm_lapic *apic = vcpu->arch.apic;
    492
    493	return __kvm_apic_update_irr(pir, apic->regs, max_irr);
    494}
    495EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
    496
    497static inline int apic_search_irr(struct kvm_lapic *apic)
    498{
    499	return find_highest_vector(apic->regs + APIC_IRR);
    500}
    501
    502static inline int apic_find_highest_irr(struct kvm_lapic *apic)
    503{
    504	int result;
    505
    506	/*
    507	 * Note that irr_pending is just a hint. It will be always
    508	 * true with virtual interrupt delivery enabled.
    509	 */
    510	if (!apic->irr_pending)
    511		return -1;
    512
    513	result = apic_search_irr(apic);
    514	ASSERT(result == -1 || result >= 16);
    515
    516	return result;
    517}
    518
    519static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
    520{
    521	struct kvm_vcpu *vcpu;
    522
    523	vcpu = apic->vcpu;
    524
    525	if (unlikely(vcpu->arch.apicv_active)) {
    526		/* need to update RVI */
    527		kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
    528		static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
    529	} else {
    530		apic->irr_pending = false;
    531		kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
    532		if (apic_search_irr(apic) != -1)
    533			apic->irr_pending = true;
    534	}
    535}
    536
    537void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
    538{
    539	apic_clear_irr(vec, vcpu->arch.apic);
    540}
    541EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
    542
    543static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
    544{
    545	struct kvm_vcpu *vcpu;
    546
    547	if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
    548		return;
    549
    550	vcpu = apic->vcpu;
    551
    552	/*
    553	 * With APIC virtualization enabled, all caching is disabled
    554	 * because the processor can modify ISR under the hood.  Instead
    555	 * just set SVI.
    556	 */
    557	if (unlikely(vcpu->arch.apicv_active))
    558		static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, vec);
    559	else {
    560		++apic->isr_count;
    561		BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
    562		/*
    563		 * ISR (in service register) bit is set when injecting an interrupt.
    564		 * The highest vector is injected. Thus the latest bit set matches
    565		 * the highest bit in ISR.
    566		 */
    567		apic->highest_isr_cache = vec;
    568	}
    569}
    570
    571static inline int apic_find_highest_isr(struct kvm_lapic *apic)
    572{
    573	int result;
    574
    575	/*
    576	 * Note that isr_count is always 1, and highest_isr_cache
    577	 * is always -1, with APIC virtualization enabled.
    578	 */
    579	if (!apic->isr_count)
    580		return -1;
    581	if (likely(apic->highest_isr_cache != -1))
    582		return apic->highest_isr_cache;
    583
    584	result = find_highest_vector(apic->regs + APIC_ISR);
    585	ASSERT(result == -1 || result >= 16);
    586
    587	return result;
    588}
    589
    590static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
    591{
    592	struct kvm_vcpu *vcpu;
    593	if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
    594		return;
    595
    596	vcpu = apic->vcpu;
    597
    598	/*
    599	 * We do get here for APIC virtualization enabled if the guest
    600	 * uses the Hyper-V APIC enlightenment.  In this case we may need
    601	 * to trigger a new interrupt delivery by writing the SVI field;
    602	 * on the other hand isr_count and highest_isr_cache are unused
    603	 * and must be left alone.
    604	 */
    605	if (unlikely(vcpu->arch.apicv_active))
    606		static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
    607	else {
    608		--apic->isr_count;
    609		BUG_ON(apic->isr_count < 0);
    610		apic->highest_isr_cache = -1;
    611	}
    612}
    613
    614int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
    615{
    616	/* This may race with setting of irr in __apic_accept_irq() and
    617	 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
    618	 * will cause vmexit immediately and the value will be recalculated
    619	 * on the next vmentry.
    620	 */
    621	return apic_find_highest_irr(vcpu->arch.apic);
    622}
    623EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
    624
    625static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
    626			     int vector, int level, int trig_mode,
    627			     struct dest_map *dest_map);
    628
    629int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
    630		     struct dest_map *dest_map)
    631{
    632	struct kvm_lapic *apic = vcpu->arch.apic;
    633
    634	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
    635			irq->level, irq->trig_mode, dest_map);
    636}
    637
    638static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
    639			 struct kvm_lapic_irq *irq, u32 min)
    640{
    641	int i, count = 0;
    642	struct kvm_vcpu *vcpu;
    643
    644	if (min > map->max_apic_id)
    645		return 0;
    646
    647	for_each_set_bit(i, ipi_bitmap,
    648		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
    649		if (map->phys_map[min + i]) {
    650			vcpu = map->phys_map[min + i]->vcpu;
    651			count += kvm_apic_set_irq(vcpu, irq, NULL);
    652		}
    653	}
    654
    655	return count;
    656}
    657
    658int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
    659		    unsigned long ipi_bitmap_high, u32 min,
    660		    unsigned long icr, int op_64_bit)
    661{
    662	struct kvm_apic_map *map;
    663	struct kvm_lapic_irq irq = {0};
    664	int cluster_size = op_64_bit ? 64 : 32;
    665	int count;
    666
    667	if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
    668		return -KVM_EINVAL;
    669
    670	irq.vector = icr & APIC_VECTOR_MASK;
    671	irq.delivery_mode = icr & APIC_MODE_MASK;
    672	irq.level = (icr & APIC_INT_ASSERT) != 0;
    673	irq.trig_mode = icr & APIC_INT_LEVELTRIG;
    674
    675	rcu_read_lock();
    676	map = rcu_dereference(kvm->arch.apic_map);
    677
    678	count = -EOPNOTSUPP;
    679	if (likely(map)) {
    680		count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
    681		min += cluster_size;
    682		count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
    683	}
    684
    685	rcu_read_unlock();
    686	return count;
    687}
    688
    689static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
    690{
    691
    692	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
    693				      sizeof(val));
    694}
    695
    696static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
    697{
    698
    699	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
    700				      sizeof(*val));
    701}
    702
    703static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
    704{
    705	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
    706}
    707
    708static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
    709{
    710	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0)
    711		return;
    712
    713	__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
    714}
    715
    716static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu)
    717{
    718	u8 val;
    719
    720	if (pv_eoi_get_user(vcpu, &val) < 0)
    721		return false;
    722
    723	val &= KVM_PV_EOI_ENABLED;
    724
    725	if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0)
    726		return false;
    727
    728	/*
    729	 * Clear pending bit in any case: it will be set again on vmentry.
    730	 * While this might not be ideal from performance point of view,
    731	 * this makes sure pv eoi is only enabled when we know it's safe.
    732	 */
    733	__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
    734
    735	return val;
    736}
    737
    738static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
    739{
    740	int highest_irr;
    741	if (kvm_x86_ops.sync_pir_to_irr)
    742		highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
    743	else
    744		highest_irr = apic_find_highest_irr(apic);
    745	if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
    746		return -1;
    747	return highest_irr;
    748}
    749
    750static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
    751{
    752	u32 tpr, isrv, ppr, old_ppr;
    753	int isr;
    754
    755	old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
    756	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
    757	isr = apic_find_highest_isr(apic);
    758	isrv = (isr != -1) ? isr : 0;
    759
    760	if ((tpr & 0xf0) >= (isrv & 0xf0))
    761		ppr = tpr & 0xff;
    762	else
    763		ppr = isrv & 0xf0;
    764
    765	*new_ppr = ppr;
    766	if (old_ppr != ppr)
    767		kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
    768
    769	return ppr < old_ppr;
    770}
    771
    772static void apic_update_ppr(struct kvm_lapic *apic)
    773{
    774	u32 ppr;
    775
    776	if (__apic_update_ppr(apic, &ppr) &&
    777	    apic_has_interrupt_for_ppr(apic, ppr) != -1)
    778		kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
    779}
    780
    781void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
    782{
    783	apic_update_ppr(vcpu->arch.apic);
    784}
    785EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
    786
    787static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
    788{
    789	kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
    790	apic_update_ppr(apic);
    791}
    792
    793static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
    794{
    795	return mda == (apic_x2apic_mode(apic) ?
    796			X2APIC_BROADCAST : APIC_BROADCAST);
    797}
    798
    799static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
    800{
    801	if (kvm_apic_broadcast(apic, mda))
    802		return true;
    803
    804	if (apic_x2apic_mode(apic))
    805		return mda == kvm_x2apic_id(apic);
    806
    807	/*
    808	 * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
    809	 * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
    810	 * this allows unique addressing of VCPUs with APIC ID over 0xff.
    811	 * The 0xff condition is needed because writeable xAPIC ID.
    812	 */
    813	if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
    814		return true;
    815
    816	return mda == kvm_xapic_id(apic);
    817}
    818
    819static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
    820{
    821	u32 logical_id;
    822
    823	if (kvm_apic_broadcast(apic, mda))
    824		return true;
    825
    826	logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
    827
    828	if (apic_x2apic_mode(apic))
    829		return ((logical_id >> 16) == (mda >> 16))
    830		       && (logical_id & mda & 0xffff) != 0;
    831
    832	logical_id = GET_APIC_LOGICAL_ID(logical_id);
    833
    834	switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
    835	case APIC_DFR_FLAT:
    836		return (logical_id & mda) != 0;
    837	case APIC_DFR_CLUSTER:
    838		return ((logical_id >> 4) == (mda >> 4))
    839		       && (logical_id & mda & 0xf) != 0;
    840	default:
    841		return false;
    842	}
    843}
    844
    845/* The KVM local APIC implementation has two quirks:
    846 *
    847 *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
    848 *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
    849 *    KVM doesn't do that aliasing.
    850 *
    851 *  - in-kernel IOAPIC messages have to be delivered directly to
    852 *    x2APIC, because the kernel does not support interrupt remapping.
    853 *    In order to support broadcast without interrupt remapping, x2APIC
    854 *    rewrites the destination of non-IPI messages from APIC_BROADCAST
    855 *    to X2APIC_BROADCAST.
    856 *
    857 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
    858 * important when userspace wants to use x2APIC-format MSIs, because
    859 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
    860 */
    861static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
    862		struct kvm_lapic *source, struct kvm_lapic *target)
    863{
    864	bool ipi = source != NULL;
    865
    866	if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
    867	    !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
    868		return X2APIC_BROADCAST;
    869
    870	return dest_id;
    871}
    872
    873bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
    874			   int shorthand, unsigned int dest, int dest_mode)
    875{
    876	struct kvm_lapic *target = vcpu->arch.apic;
    877	u32 mda = kvm_apic_mda(vcpu, dest, source, target);
    878
    879	ASSERT(target);
    880	switch (shorthand) {
    881	case APIC_DEST_NOSHORT:
    882		if (dest_mode == APIC_DEST_PHYSICAL)
    883			return kvm_apic_match_physical_addr(target, mda);
    884		else
    885			return kvm_apic_match_logical_addr(target, mda);
    886	case APIC_DEST_SELF:
    887		return target == source;
    888	case APIC_DEST_ALLINC:
    889		return true;
    890	case APIC_DEST_ALLBUT:
    891		return target != source;
    892	default:
    893		return false;
    894	}
    895}
    896EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
    897
    898int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
    899		       const unsigned long *bitmap, u32 bitmap_size)
    900{
    901	u32 mod;
    902	int i, idx = -1;
    903
    904	mod = vector % dest_vcpus;
    905
    906	for (i = 0; i <= mod; i++) {
    907		idx = find_next_bit(bitmap, bitmap_size, idx + 1);
    908		BUG_ON(idx == bitmap_size);
    909	}
    910
    911	return idx;
    912}
    913
    914static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
    915{
    916	if (!kvm->arch.disabled_lapic_found) {
    917		kvm->arch.disabled_lapic_found = true;
    918		printk(KERN_INFO
    919		       "Disabled LAPIC found during irq injection\n");
    920	}
    921}
    922
    923static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
    924		struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
    925{
    926	if (kvm->arch.x2apic_broadcast_quirk_disabled) {
    927		if ((irq->dest_id == APIC_BROADCAST &&
    928				map->mode != KVM_APIC_MODE_X2APIC))
    929			return true;
    930		if (irq->dest_id == X2APIC_BROADCAST)
    931			return true;
    932	} else {
    933		bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
    934		if (irq->dest_id == (x2apic_ipi ?
    935		                     X2APIC_BROADCAST : APIC_BROADCAST))
    936			return true;
    937	}
    938
    939	return false;
    940}
    941
    942/* Return true if the interrupt can be handled by using *bitmap as index mask
    943 * for valid destinations in *dst array.
    944 * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
    945 * Note: we may have zero kvm_lapic destinations when we return true, which
    946 * means that the interrupt should be dropped.  In this case, *bitmap would be
    947 * zero and *dst undefined.
    948 */
    949static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
    950		struct kvm_lapic **src, struct kvm_lapic_irq *irq,
    951		struct kvm_apic_map *map, struct kvm_lapic ***dst,
    952		unsigned long *bitmap)
    953{
    954	int i, lowest;
    955
    956	if (irq->shorthand == APIC_DEST_SELF && src) {
    957		*dst = src;
    958		*bitmap = 1;
    959		return true;
    960	} else if (irq->shorthand)
    961		return false;
    962
    963	if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
    964		return false;
    965
    966	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
    967		if (irq->dest_id > map->max_apic_id) {
    968			*bitmap = 0;
    969		} else {
    970			u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
    971			*dst = &map->phys_map[dest_id];
    972			*bitmap = 1;
    973		}
    974		return true;
    975	}
    976
    977	*bitmap = 0;
    978	if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
    979				(u16 *)bitmap))
    980		return false;
    981
    982	if (!kvm_lowest_prio_delivery(irq))
    983		return true;
    984
    985	if (!kvm_vector_hashing_enabled()) {
    986		lowest = -1;
    987		for_each_set_bit(i, bitmap, 16) {
    988			if (!(*dst)[i])
    989				continue;
    990			if (lowest < 0)
    991				lowest = i;
    992			else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
    993						(*dst)[lowest]->vcpu) < 0)
    994				lowest = i;
    995		}
    996	} else {
    997		if (!*bitmap)
    998			return true;
    999
   1000		lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
   1001				bitmap, 16);
   1002
   1003		if (!(*dst)[lowest]) {
   1004			kvm_apic_disabled_lapic_found(kvm);
   1005			*bitmap = 0;
   1006			return true;
   1007		}
   1008	}
   1009
   1010	*bitmap = (lowest >= 0) ? 1 << lowest : 0;
   1011
   1012	return true;
   1013}
   1014
   1015bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
   1016		struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
   1017{
   1018	struct kvm_apic_map *map;
   1019	unsigned long bitmap;
   1020	struct kvm_lapic **dst = NULL;
   1021	int i;
   1022	bool ret;
   1023
   1024	*r = -1;
   1025
   1026	if (irq->shorthand == APIC_DEST_SELF) {
   1027		if (KVM_BUG_ON(!src, kvm)) {
   1028			*r = 0;
   1029			return true;
   1030		}
   1031		*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
   1032		return true;
   1033	}
   1034
   1035	rcu_read_lock();
   1036	map = rcu_dereference(kvm->arch.apic_map);
   1037
   1038	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
   1039	if (ret) {
   1040		*r = 0;
   1041		for_each_set_bit(i, &bitmap, 16) {
   1042			if (!dst[i])
   1043				continue;
   1044			*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
   1045		}
   1046	}
   1047
   1048	rcu_read_unlock();
   1049	return ret;
   1050}
   1051
   1052/*
   1053 * This routine tries to handle interrupts in posted mode, here is how
   1054 * it deals with different cases:
   1055 * - For single-destination interrupts, handle it in posted mode
   1056 * - Else if vector hashing is enabled and it is a lowest-priority
   1057 *   interrupt, handle it in posted mode and use the following mechanism
   1058 *   to find the destination vCPU.
   1059 *	1. For lowest-priority interrupts, store all the possible
   1060 *	   destination vCPUs in an array.
   1061 *	2. Use "guest vector % max number of destination vCPUs" to find
   1062 *	   the right destination vCPU in the array for the lowest-priority
   1063 *	   interrupt.
   1064 * - Otherwise, use remapped mode to inject the interrupt.
   1065 */
   1066bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
   1067			struct kvm_vcpu **dest_vcpu)
   1068{
   1069	struct kvm_apic_map *map;
   1070	unsigned long bitmap;
   1071	struct kvm_lapic **dst = NULL;
   1072	bool ret = false;
   1073
   1074	if (irq->shorthand)
   1075		return false;
   1076
   1077	rcu_read_lock();
   1078	map = rcu_dereference(kvm->arch.apic_map);
   1079
   1080	if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
   1081			hweight16(bitmap) == 1) {
   1082		unsigned long i = find_first_bit(&bitmap, 16);
   1083
   1084		if (dst[i]) {
   1085			*dest_vcpu = dst[i]->vcpu;
   1086			ret = true;
   1087		}
   1088	}
   1089
   1090	rcu_read_unlock();
   1091	return ret;
   1092}
   1093
   1094/*
   1095 * Add a pending IRQ into lapic.
   1096 * Return 1 if successfully added and 0 if discarded.
   1097 */
   1098static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
   1099			     int vector, int level, int trig_mode,
   1100			     struct dest_map *dest_map)
   1101{
   1102	int result = 0;
   1103	struct kvm_vcpu *vcpu = apic->vcpu;
   1104
   1105	trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
   1106				  trig_mode, vector);
   1107	switch (delivery_mode) {
   1108	case APIC_DM_LOWEST:
   1109		vcpu->arch.apic_arb_prio++;
   1110		fallthrough;
   1111	case APIC_DM_FIXED:
   1112		if (unlikely(trig_mode && !level))
   1113			break;
   1114
   1115		/* FIXME add logic for vcpu on reset */
   1116		if (unlikely(!apic_enabled(apic)))
   1117			break;
   1118
   1119		result = 1;
   1120
   1121		if (dest_map) {
   1122			__set_bit(vcpu->vcpu_id, dest_map->map);
   1123			dest_map->vectors[vcpu->vcpu_id] = vector;
   1124		}
   1125
   1126		if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
   1127			if (trig_mode)
   1128				kvm_lapic_set_vector(vector,
   1129						     apic->regs + APIC_TMR);
   1130			else
   1131				kvm_lapic_clear_vector(vector,
   1132						       apic->regs + APIC_TMR);
   1133		}
   1134
   1135		static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode,
   1136						       trig_mode, vector);
   1137		break;
   1138
   1139	case APIC_DM_REMRD:
   1140		result = 1;
   1141		vcpu->arch.pv.pv_unhalted = 1;
   1142		kvm_make_request(KVM_REQ_EVENT, vcpu);
   1143		kvm_vcpu_kick(vcpu);
   1144		break;
   1145
   1146	case APIC_DM_SMI:
   1147		result = 1;
   1148		kvm_make_request(KVM_REQ_SMI, vcpu);
   1149		kvm_vcpu_kick(vcpu);
   1150		break;
   1151
   1152	case APIC_DM_NMI:
   1153		result = 1;
   1154		kvm_inject_nmi(vcpu);
   1155		kvm_vcpu_kick(vcpu);
   1156		break;
   1157
   1158	case APIC_DM_INIT:
   1159		if (!trig_mode || level) {
   1160			result = 1;
   1161			/* assumes that there are only KVM_APIC_INIT/SIPI */
   1162			apic->pending_events = (1UL << KVM_APIC_INIT);
   1163			kvm_make_request(KVM_REQ_EVENT, vcpu);
   1164			kvm_vcpu_kick(vcpu);
   1165		}
   1166		break;
   1167
   1168	case APIC_DM_STARTUP:
   1169		result = 1;
   1170		apic->sipi_vector = vector;
   1171		/* make sure sipi_vector is visible for the receiver */
   1172		smp_wmb();
   1173		set_bit(KVM_APIC_SIPI, &apic->pending_events);
   1174		kvm_make_request(KVM_REQ_EVENT, vcpu);
   1175		kvm_vcpu_kick(vcpu);
   1176		break;
   1177
   1178	case APIC_DM_EXTINT:
   1179		/*
   1180		 * Should only be called by kvm_apic_local_deliver() with LVT0,
   1181		 * before NMI watchdog was enabled. Already handled by
   1182		 * kvm_apic_accept_pic_intr().
   1183		 */
   1184		break;
   1185
   1186	default:
   1187		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
   1188		       delivery_mode);
   1189		break;
   1190	}
   1191	return result;
   1192}
   1193
   1194/*
   1195 * This routine identifies the destination vcpus mask meant to receive the
   1196 * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
   1197 * out the destination vcpus array and set the bitmap or it traverses to
   1198 * each available vcpu to identify the same.
   1199 */
   1200void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
   1201			      unsigned long *vcpu_bitmap)
   1202{
   1203	struct kvm_lapic **dest_vcpu = NULL;
   1204	struct kvm_lapic *src = NULL;
   1205	struct kvm_apic_map *map;
   1206	struct kvm_vcpu *vcpu;
   1207	unsigned long bitmap, i;
   1208	int vcpu_idx;
   1209	bool ret;
   1210
   1211	rcu_read_lock();
   1212	map = rcu_dereference(kvm->arch.apic_map);
   1213
   1214	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
   1215					  &bitmap);
   1216	if (ret) {
   1217		for_each_set_bit(i, &bitmap, 16) {
   1218			if (!dest_vcpu[i])
   1219				continue;
   1220			vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
   1221			__set_bit(vcpu_idx, vcpu_bitmap);
   1222		}
   1223	} else {
   1224		kvm_for_each_vcpu(i, vcpu, kvm) {
   1225			if (!kvm_apic_present(vcpu))
   1226				continue;
   1227			if (!kvm_apic_match_dest(vcpu, NULL,
   1228						 irq->shorthand,
   1229						 irq->dest_id,
   1230						 irq->dest_mode))
   1231				continue;
   1232			__set_bit(i, vcpu_bitmap);
   1233		}
   1234	}
   1235	rcu_read_unlock();
   1236}
   1237
   1238int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
   1239{
   1240	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
   1241}
   1242
   1243static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
   1244{
   1245	return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
   1246}
   1247
   1248static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
   1249{
   1250	int trigger_mode;
   1251
   1252	/* Eoi the ioapic only if the ioapic doesn't own the vector. */
   1253	if (!kvm_ioapic_handles_vector(apic, vector))
   1254		return;
   1255
   1256	/* Request a KVM exit to inform the userspace IOAPIC. */
   1257	if (irqchip_split(apic->vcpu->kvm)) {
   1258		apic->vcpu->arch.pending_ioapic_eoi = vector;
   1259		kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
   1260		return;
   1261	}
   1262
   1263	if (apic_test_vector(vector, apic->regs + APIC_TMR))
   1264		trigger_mode = IOAPIC_LEVEL_TRIG;
   1265	else
   1266		trigger_mode = IOAPIC_EDGE_TRIG;
   1267
   1268	kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
   1269}
   1270
   1271static int apic_set_eoi(struct kvm_lapic *apic)
   1272{
   1273	int vector = apic_find_highest_isr(apic);
   1274
   1275	trace_kvm_eoi(apic, vector);
   1276
   1277	/*
   1278	 * Not every write EOI will has corresponding ISR,
   1279	 * one example is when Kernel check timer on setup_IO_APIC
   1280	 */
   1281	if (vector == -1)
   1282		return vector;
   1283
   1284	apic_clear_isr(vector, apic);
   1285	apic_update_ppr(apic);
   1286
   1287	if (to_hv_vcpu(apic->vcpu) &&
   1288	    test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap))
   1289		kvm_hv_synic_send_eoi(apic->vcpu, vector);
   1290
   1291	kvm_ioapic_send_eoi(apic, vector);
   1292	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
   1293	return vector;
   1294}
   1295
   1296/*
   1297 * this interface assumes a trap-like exit, which has already finished
   1298 * desired side effect including vISR and vPPR update.
   1299 */
   1300void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
   1301{
   1302	struct kvm_lapic *apic = vcpu->arch.apic;
   1303
   1304	trace_kvm_eoi(apic, vector);
   1305
   1306	kvm_ioapic_send_eoi(apic, vector);
   1307	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
   1308}
   1309EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
   1310
   1311void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
   1312{
   1313	struct kvm_lapic_irq irq;
   1314
   1315	/* KVM has no delay and should always clear the BUSY/PENDING flag. */
   1316	WARN_ON_ONCE(icr_low & APIC_ICR_BUSY);
   1317
   1318	irq.vector = icr_low & APIC_VECTOR_MASK;
   1319	irq.delivery_mode = icr_low & APIC_MODE_MASK;
   1320	irq.dest_mode = icr_low & APIC_DEST_MASK;
   1321	irq.level = (icr_low & APIC_INT_ASSERT) != 0;
   1322	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
   1323	irq.shorthand = icr_low & APIC_SHORT_MASK;
   1324	irq.msi_redir_hint = false;
   1325	if (apic_x2apic_mode(apic))
   1326		irq.dest_id = icr_high;
   1327	else
   1328		irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
   1329
   1330	trace_kvm_apic_ipi(icr_low, irq.dest_id);
   1331
   1332	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
   1333}
   1334EXPORT_SYMBOL_GPL(kvm_apic_send_ipi);
   1335
   1336static u32 apic_get_tmcct(struct kvm_lapic *apic)
   1337{
   1338	ktime_t remaining, now;
   1339	s64 ns;
   1340	u32 tmcct;
   1341
   1342	ASSERT(apic != NULL);
   1343
   1344	/* if initial count is 0, current count should also be 0 */
   1345	if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
   1346		apic->lapic_timer.period == 0)
   1347		return 0;
   1348
   1349	now = ktime_get();
   1350	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
   1351	if (ktime_to_ns(remaining) < 0)
   1352		remaining = 0;
   1353
   1354	ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
   1355	tmcct = div64_u64(ns,
   1356			 (APIC_BUS_CYCLE_NS * apic->divide_count));
   1357
   1358	return tmcct;
   1359}
   1360
   1361static void __report_tpr_access(struct kvm_lapic *apic, bool write)
   1362{
   1363	struct kvm_vcpu *vcpu = apic->vcpu;
   1364	struct kvm_run *run = vcpu->run;
   1365
   1366	kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
   1367	run->tpr_access.rip = kvm_rip_read(vcpu);
   1368	run->tpr_access.is_write = write;
   1369}
   1370
   1371static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
   1372{
   1373	if (apic->vcpu->arch.tpr_access_reporting)
   1374		__report_tpr_access(apic, write);
   1375}
   1376
   1377static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
   1378{
   1379	u32 val = 0;
   1380
   1381	if (offset >= LAPIC_MMIO_LENGTH)
   1382		return 0;
   1383
   1384	switch (offset) {
   1385	case APIC_ARBPRI:
   1386		break;
   1387
   1388	case APIC_TMCCT:	/* Timer CCR */
   1389		if (apic_lvtt_tscdeadline(apic))
   1390			return 0;
   1391
   1392		val = apic_get_tmcct(apic);
   1393		break;
   1394	case APIC_PROCPRI:
   1395		apic_update_ppr(apic);
   1396		val = kvm_lapic_get_reg(apic, offset);
   1397		break;
   1398	case APIC_TASKPRI:
   1399		report_tpr_access(apic, false);
   1400		fallthrough;
   1401	default:
   1402		val = kvm_lapic_get_reg(apic, offset);
   1403		break;
   1404	}
   1405
   1406	return val;
   1407}
   1408
   1409static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
   1410{
   1411	return container_of(dev, struct kvm_lapic, dev);
   1412}
   1413
   1414#define APIC_REG_MASK(reg)	(1ull << ((reg) >> 4))
   1415#define APIC_REGS_MASK(first, count) \
   1416	(APIC_REG_MASK(first) * ((1ull << (count)) - 1))
   1417
   1418static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
   1419			      void *data)
   1420{
   1421	unsigned char alignment = offset & 0xf;
   1422	u32 result;
   1423	/* this bitmask has a bit cleared for each reserved register */
   1424	u64 valid_reg_mask =
   1425		APIC_REG_MASK(APIC_ID) |
   1426		APIC_REG_MASK(APIC_LVR) |
   1427		APIC_REG_MASK(APIC_TASKPRI) |
   1428		APIC_REG_MASK(APIC_PROCPRI) |
   1429		APIC_REG_MASK(APIC_LDR) |
   1430		APIC_REG_MASK(APIC_DFR) |
   1431		APIC_REG_MASK(APIC_SPIV) |
   1432		APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
   1433		APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
   1434		APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
   1435		APIC_REG_MASK(APIC_ESR) |
   1436		APIC_REG_MASK(APIC_ICR) |
   1437		APIC_REG_MASK(APIC_LVTT) |
   1438		APIC_REG_MASK(APIC_LVTTHMR) |
   1439		APIC_REG_MASK(APIC_LVTPC) |
   1440		APIC_REG_MASK(APIC_LVT0) |
   1441		APIC_REG_MASK(APIC_LVT1) |
   1442		APIC_REG_MASK(APIC_LVTERR) |
   1443		APIC_REG_MASK(APIC_TMICT) |
   1444		APIC_REG_MASK(APIC_TMCCT) |
   1445		APIC_REG_MASK(APIC_TDCR);
   1446
   1447	/*
   1448	 * ARBPRI and ICR2 are not valid in x2APIC mode.  WARN if KVM reads ICR
   1449	 * in x2APIC mode as it's an 8-byte register in x2APIC and needs to be
   1450	 * manually handled by the caller.
   1451	 */
   1452	if (!apic_x2apic_mode(apic))
   1453		valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) |
   1454				  APIC_REG_MASK(APIC_ICR2);
   1455	else
   1456		WARN_ON_ONCE(offset == APIC_ICR);
   1457
   1458	if (alignment + len > 4)
   1459		return 1;
   1460
   1461	if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
   1462		return 1;
   1463
   1464	result = __apic_read(apic, offset & ~0xf);
   1465
   1466	trace_kvm_apic_read(offset, result);
   1467
   1468	switch (len) {
   1469	case 1:
   1470	case 2:
   1471	case 4:
   1472		memcpy(data, (char *)&result + alignment, len);
   1473		break;
   1474	default:
   1475		printk(KERN_ERR "Local APIC read with len = %x, "
   1476		       "should be 1,2, or 4 instead\n", len);
   1477		break;
   1478	}
   1479	return 0;
   1480}
   1481
   1482static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
   1483{
   1484	return addr >= apic->base_address &&
   1485		addr < apic->base_address + LAPIC_MMIO_LENGTH;
   1486}
   1487
   1488static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
   1489			   gpa_t address, int len, void *data)
   1490{
   1491	struct kvm_lapic *apic = to_lapic(this);
   1492	u32 offset = address - apic->base_address;
   1493
   1494	if (!apic_mmio_in_range(apic, address))
   1495		return -EOPNOTSUPP;
   1496
   1497	if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
   1498		if (!kvm_check_has_quirk(vcpu->kvm,
   1499					 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
   1500			return -EOPNOTSUPP;
   1501
   1502		memset(data, 0xff, len);
   1503		return 0;
   1504	}
   1505
   1506	kvm_lapic_reg_read(apic, offset, len, data);
   1507
   1508	return 0;
   1509}
   1510
   1511static void update_divide_count(struct kvm_lapic *apic)
   1512{
   1513	u32 tmp1, tmp2, tdcr;
   1514
   1515	tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
   1516	tmp1 = tdcr & 0xf;
   1517	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
   1518	apic->divide_count = 0x1 << (tmp2 & 0x7);
   1519}
   1520
   1521static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
   1522{
   1523	/*
   1524	 * Do not allow the guest to program periodic timers with small
   1525	 * interval, since the hrtimers are not throttled by the host
   1526	 * scheduler.
   1527	 */
   1528	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
   1529		s64 min_period = min_timer_period_us * 1000LL;
   1530
   1531		if (apic->lapic_timer.period < min_period) {
   1532			pr_info_ratelimited(
   1533			    "kvm: vcpu %i: requested %lld ns "
   1534			    "lapic timer period limited to %lld ns\n",
   1535			    apic->vcpu->vcpu_id,
   1536			    apic->lapic_timer.period, min_period);
   1537			apic->lapic_timer.period = min_period;
   1538		}
   1539	}
   1540}
   1541
   1542static void cancel_hv_timer(struct kvm_lapic *apic);
   1543
   1544static void cancel_apic_timer(struct kvm_lapic *apic)
   1545{
   1546	hrtimer_cancel(&apic->lapic_timer.timer);
   1547	preempt_disable();
   1548	if (apic->lapic_timer.hv_timer_in_use)
   1549		cancel_hv_timer(apic);
   1550	preempt_enable();
   1551	atomic_set(&apic->lapic_timer.pending, 0);
   1552}
   1553
   1554static void apic_update_lvtt(struct kvm_lapic *apic)
   1555{
   1556	u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
   1557			apic->lapic_timer.timer_mode_mask;
   1558
   1559	if (apic->lapic_timer.timer_mode != timer_mode) {
   1560		if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
   1561				APIC_LVT_TIMER_TSCDEADLINE)) {
   1562			cancel_apic_timer(apic);
   1563			kvm_lapic_set_reg(apic, APIC_TMICT, 0);
   1564			apic->lapic_timer.period = 0;
   1565			apic->lapic_timer.tscdeadline = 0;
   1566		}
   1567		apic->lapic_timer.timer_mode = timer_mode;
   1568		limit_periodic_timer_frequency(apic);
   1569	}
   1570}
   1571
   1572/*
   1573 * On APICv, this test will cause a busy wait
   1574 * during a higher-priority task.
   1575 */
   1576
   1577static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
   1578{
   1579	struct kvm_lapic *apic = vcpu->arch.apic;
   1580	u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
   1581
   1582	if (kvm_apic_hw_enabled(apic)) {
   1583		int vec = reg & APIC_VECTOR_MASK;
   1584		void *bitmap = apic->regs + APIC_ISR;
   1585
   1586		if (vcpu->arch.apicv_active)
   1587			bitmap = apic->regs + APIC_IRR;
   1588
   1589		if (apic_test_vector(vec, bitmap))
   1590			return true;
   1591	}
   1592	return false;
   1593}
   1594
   1595static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
   1596{
   1597	u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
   1598
   1599	/*
   1600	 * If the guest TSC is running at a different ratio than the host, then
   1601	 * convert the delay to nanoseconds to achieve an accurate delay.  Note
   1602	 * that __delay() uses delay_tsc whenever the hardware has TSC, thus
   1603	 * always for VMX enabled hardware.
   1604	 */
   1605	if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
   1606		__delay(min(guest_cycles,
   1607			nsec_to_cycles(vcpu, timer_advance_ns)));
   1608	} else {
   1609		u64 delay_ns = guest_cycles * 1000000ULL;
   1610		do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
   1611		ndelay(min_t(u32, delay_ns, timer_advance_ns));
   1612	}
   1613}
   1614
   1615static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
   1616					      s64 advance_expire_delta)
   1617{
   1618	struct kvm_lapic *apic = vcpu->arch.apic;
   1619	u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
   1620	u64 ns;
   1621
   1622	/* Do not adjust for tiny fluctuations or large random spikes. */
   1623	if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
   1624	    abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
   1625		return;
   1626
   1627	/* too early */
   1628	if (advance_expire_delta < 0) {
   1629		ns = -advance_expire_delta * 1000000ULL;
   1630		do_div(ns, vcpu->arch.virtual_tsc_khz);
   1631		timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
   1632	} else {
   1633	/* too late */
   1634		ns = advance_expire_delta * 1000000ULL;
   1635		do_div(ns, vcpu->arch.virtual_tsc_khz);
   1636		timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
   1637	}
   1638
   1639	if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
   1640		timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
   1641	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
   1642}
   1643
   1644static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
   1645{
   1646	struct kvm_lapic *apic = vcpu->arch.apic;
   1647	u64 guest_tsc, tsc_deadline;
   1648
   1649	tsc_deadline = apic->lapic_timer.expired_tscdeadline;
   1650	apic->lapic_timer.expired_tscdeadline = 0;
   1651	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
   1652	trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
   1653
   1654	if (lapic_timer_advance_dynamic) {
   1655		adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
   1656		/*
   1657		 * If the timer fired early, reread the TSC to account for the
   1658		 * overhead of the above adjustment to avoid waiting longer
   1659		 * than is necessary.
   1660		 */
   1661		if (guest_tsc < tsc_deadline)
   1662			guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
   1663	}
   1664
   1665	if (guest_tsc < tsc_deadline)
   1666		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
   1667}
   1668
   1669void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
   1670{
   1671	if (lapic_in_kernel(vcpu) &&
   1672	    vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
   1673	    vcpu->arch.apic->lapic_timer.timer_advance_ns &&
   1674	    lapic_timer_int_injected(vcpu))
   1675		__kvm_wait_lapic_expire(vcpu);
   1676}
   1677EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
   1678
   1679static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
   1680{
   1681	struct kvm_timer *ktimer = &apic->lapic_timer;
   1682
   1683	kvm_apic_local_deliver(apic, APIC_LVTT);
   1684	if (apic_lvtt_tscdeadline(apic)) {
   1685		ktimer->tscdeadline = 0;
   1686	} else if (apic_lvtt_oneshot(apic)) {
   1687		ktimer->tscdeadline = 0;
   1688		ktimer->target_expiration = 0;
   1689	}
   1690}
   1691
   1692static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
   1693{
   1694	struct kvm_vcpu *vcpu = apic->vcpu;
   1695	struct kvm_timer *ktimer = &apic->lapic_timer;
   1696
   1697	if (atomic_read(&apic->lapic_timer.pending))
   1698		return;
   1699
   1700	if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
   1701		ktimer->expired_tscdeadline = ktimer->tscdeadline;
   1702
   1703	if (!from_timer_fn && vcpu->arch.apicv_active) {
   1704		WARN_ON(kvm_get_running_vcpu() != vcpu);
   1705		kvm_apic_inject_pending_timer_irqs(apic);
   1706		return;
   1707	}
   1708
   1709	if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
   1710		/*
   1711		 * Ensure the guest's timer has truly expired before posting an
   1712		 * interrupt.  Open code the relevant checks to avoid querying
   1713		 * lapic_timer_int_injected(), which will be false since the
   1714		 * interrupt isn't yet injected.  Waiting until after injecting
   1715		 * is not an option since that won't help a posted interrupt.
   1716		 */
   1717		if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
   1718		    vcpu->arch.apic->lapic_timer.timer_advance_ns)
   1719			__kvm_wait_lapic_expire(vcpu);
   1720		kvm_apic_inject_pending_timer_irqs(apic);
   1721		return;
   1722	}
   1723
   1724	atomic_inc(&apic->lapic_timer.pending);
   1725	kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
   1726	if (from_timer_fn)
   1727		kvm_vcpu_kick(vcpu);
   1728}
   1729
   1730static void start_sw_tscdeadline(struct kvm_lapic *apic)
   1731{
   1732	struct kvm_timer *ktimer = &apic->lapic_timer;
   1733	u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
   1734	u64 ns = 0;
   1735	ktime_t expire;
   1736	struct kvm_vcpu *vcpu = apic->vcpu;
   1737	unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
   1738	unsigned long flags;
   1739	ktime_t now;
   1740
   1741	if (unlikely(!tscdeadline || !this_tsc_khz))
   1742		return;
   1743
   1744	local_irq_save(flags);
   1745
   1746	now = ktime_get();
   1747	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
   1748
   1749	ns = (tscdeadline - guest_tsc) * 1000000ULL;
   1750	do_div(ns, this_tsc_khz);
   1751
   1752	if (likely(tscdeadline > guest_tsc) &&
   1753	    likely(ns > apic->lapic_timer.timer_advance_ns)) {
   1754		expire = ktime_add_ns(now, ns);
   1755		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
   1756		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
   1757	} else
   1758		apic_timer_expired(apic, false);
   1759
   1760	local_irq_restore(flags);
   1761}
   1762
   1763static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
   1764{
   1765	return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
   1766}
   1767
   1768static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
   1769{
   1770	ktime_t now, remaining;
   1771	u64 ns_remaining_old, ns_remaining_new;
   1772
   1773	apic->lapic_timer.period =
   1774			tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
   1775	limit_periodic_timer_frequency(apic);
   1776
   1777	now = ktime_get();
   1778	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
   1779	if (ktime_to_ns(remaining) < 0)
   1780		remaining = 0;
   1781
   1782	ns_remaining_old = ktime_to_ns(remaining);
   1783	ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
   1784	                                   apic->divide_count, old_divisor);
   1785
   1786	apic->lapic_timer.tscdeadline +=
   1787		nsec_to_cycles(apic->vcpu, ns_remaining_new) -
   1788		nsec_to_cycles(apic->vcpu, ns_remaining_old);
   1789	apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
   1790}
   1791
   1792static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
   1793{
   1794	ktime_t now;
   1795	u64 tscl = rdtsc();
   1796	s64 deadline;
   1797
   1798	now = ktime_get();
   1799	apic->lapic_timer.period =
   1800			tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
   1801
   1802	if (!apic->lapic_timer.period) {
   1803		apic->lapic_timer.tscdeadline = 0;
   1804		return false;
   1805	}
   1806
   1807	limit_periodic_timer_frequency(apic);
   1808	deadline = apic->lapic_timer.period;
   1809
   1810	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
   1811		if (unlikely(count_reg != APIC_TMICT)) {
   1812			deadline = tmict_to_ns(apic,
   1813				     kvm_lapic_get_reg(apic, count_reg));
   1814			if (unlikely(deadline <= 0))
   1815				deadline = apic->lapic_timer.period;
   1816			else if (unlikely(deadline > apic->lapic_timer.period)) {
   1817				pr_info_ratelimited(
   1818				    "kvm: vcpu %i: requested lapic timer restore with "
   1819				    "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
   1820				    "Using initial count to start timer.\n",
   1821				    apic->vcpu->vcpu_id,
   1822				    count_reg,
   1823				    kvm_lapic_get_reg(apic, count_reg),
   1824				    deadline, apic->lapic_timer.period);
   1825				kvm_lapic_set_reg(apic, count_reg, 0);
   1826				deadline = apic->lapic_timer.period;
   1827			}
   1828		}
   1829	}
   1830
   1831	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
   1832		nsec_to_cycles(apic->vcpu, deadline);
   1833	apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
   1834
   1835	return true;
   1836}
   1837
   1838static void advance_periodic_target_expiration(struct kvm_lapic *apic)
   1839{
   1840	ktime_t now = ktime_get();
   1841	u64 tscl = rdtsc();
   1842	ktime_t delta;
   1843
   1844	/*
   1845	 * Synchronize both deadlines to the same time source or
   1846	 * differences in the periods (caused by differences in the
   1847	 * underlying clocks or numerical approximation errors) will
   1848	 * cause the two to drift apart over time as the errors
   1849	 * accumulate.
   1850	 */
   1851	apic->lapic_timer.target_expiration =
   1852		ktime_add_ns(apic->lapic_timer.target_expiration,
   1853				apic->lapic_timer.period);
   1854	delta = ktime_sub(apic->lapic_timer.target_expiration, now);
   1855	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
   1856		nsec_to_cycles(apic->vcpu, delta);
   1857}
   1858
   1859static void start_sw_period(struct kvm_lapic *apic)
   1860{
   1861	if (!apic->lapic_timer.period)
   1862		return;
   1863
   1864	if (ktime_after(ktime_get(),
   1865			apic->lapic_timer.target_expiration)) {
   1866		apic_timer_expired(apic, false);
   1867
   1868		if (apic_lvtt_oneshot(apic))
   1869			return;
   1870
   1871		advance_periodic_target_expiration(apic);
   1872	}
   1873
   1874	hrtimer_start(&apic->lapic_timer.timer,
   1875		apic->lapic_timer.target_expiration,
   1876		HRTIMER_MODE_ABS_HARD);
   1877}
   1878
   1879bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
   1880{
   1881	if (!lapic_in_kernel(vcpu))
   1882		return false;
   1883
   1884	return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
   1885}
   1886EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
   1887
   1888static void cancel_hv_timer(struct kvm_lapic *apic)
   1889{
   1890	WARN_ON(preemptible());
   1891	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
   1892	static_call(kvm_x86_cancel_hv_timer)(apic->vcpu);
   1893	apic->lapic_timer.hv_timer_in_use = false;
   1894}
   1895
   1896static bool start_hv_timer(struct kvm_lapic *apic)
   1897{
   1898	struct kvm_timer *ktimer = &apic->lapic_timer;
   1899	struct kvm_vcpu *vcpu = apic->vcpu;
   1900	bool expired;
   1901
   1902	WARN_ON(preemptible());
   1903	if (!kvm_can_use_hv_timer(vcpu))
   1904		return false;
   1905
   1906	if (!ktimer->tscdeadline)
   1907		return false;
   1908
   1909	if (static_call(kvm_x86_set_hv_timer)(vcpu, ktimer->tscdeadline, &expired))
   1910		return false;
   1911
   1912	ktimer->hv_timer_in_use = true;
   1913	hrtimer_cancel(&ktimer->timer);
   1914
   1915	/*
   1916	 * To simplify handling the periodic timer, leave the hv timer running
   1917	 * even if the deadline timer has expired, i.e. rely on the resulting
   1918	 * VM-Exit to recompute the periodic timer's target expiration.
   1919	 */
   1920	if (!apic_lvtt_period(apic)) {
   1921		/*
   1922		 * Cancel the hv timer if the sw timer fired while the hv timer
   1923		 * was being programmed, or if the hv timer itself expired.
   1924		 */
   1925		if (atomic_read(&ktimer->pending)) {
   1926			cancel_hv_timer(apic);
   1927		} else if (expired) {
   1928			apic_timer_expired(apic, false);
   1929			cancel_hv_timer(apic);
   1930		}
   1931	}
   1932
   1933	trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
   1934
   1935	return true;
   1936}
   1937
   1938static void start_sw_timer(struct kvm_lapic *apic)
   1939{
   1940	struct kvm_timer *ktimer = &apic->lapic_timer;
   1941
   1942	WARN_ON(preemptible());
   1943	if (apic->lapic_timer.hv_timer_in_use)
   1944		cancel_hv_timer(apic);
   1945	if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
   1946		return;
   1947
   1948	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
   1949		start_sw_period(apic);
   1950	else if (apic_lvtt_tscdeadline(apic))
   1951		start_sw_tscdeadline(apic);
   1952	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
   1953}
   1954
   1955static void restart_apic_timer(struct kvm_lapic *apic)
   1956{
   1957	preempt_disable();
   1958
   1959	if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
   1960		goto out;
   1961
   1962	if (!start_hv_timer(apic))
   1963		start_sw_timer(apic);
   1964out:
   1965	preempt_enable();
   1966}
   1967
   1968void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
   1969{
   1970	struct kvm_lapic *apic = vcpu->arch.apic;
   1971
   1972	preempt_disable();
   1973	/* If the preempt notifier has already run, it also called apic_timer_expired */
   1974	if (!apic->lapic_timer.hv_timer_in_use)
   1975		goto out;
   1976	WARN_ON(kvm_vcpu_is_blocking(vcpu));
   1977	apic_timer_expired(apic, false);
   1978	cancel_hv_timer(apic);
   1979
   1980	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
   1981		advance_periodic_target_expiration(apic);
   1982		restart_apic_timer(apic);
   1983	}
   1984out:
   1985	preempt_enable();
   1986}
   1987EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
   1988
   1989void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
   1990{
   1991	restart_apic_timer(vcpu->arch.apic);
   1992}
   1993
   1994void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
   1995{
   1996	struct kvm_lapic *apic = vcpu->arch.apic;
   1997
   1998	preempt_disable();
   1999	/* Possibly the TSC deadline timer is not enabled yet */
   2000	if (apic->lapic_timer.hv_timer_in_use)
   2001		start_sw_timer(apic);
   2002	preempt_enable();
   2003}
   2004
   2005void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
   2006{
   2007	struct kvm_lapic *apic = vcpu->arch.apic;
   2008
   2009	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
   2010	restart_apic_timer(apic);
   2011}
   2012
   2013static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
   2014{
   2015	atomic_set(&apic->lapic_timer.pending, 0);
   2016
   2017	if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
   2018	    && !set_target_expiration(apic, count_reg))
   2019		return;
   2020
   2021	restart_apic_timer(apic);
   2022}
   2023
   2024static void start_apic_timer(struct kvm_lapic *apic)
   2025{
   2026	__start_apic_timer(apic, APIC_TMICT);
   2027}
   2028
   2029static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
   2030{
   2031	bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
   2032
   2033	if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
   2034		apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
   2035		if (lvt0_in_nmi_mode) {
   2036			atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
   2037		} else
   2038			atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
   2039	}
   2040}
   2041
   2042static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
   2043{
   2044	struct kvm *kvm = apic->vcpu->kvm;
   2045
   2046	if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
   2047		return;
   2048
   2049	if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
   2050		return;
   2051
   2052	kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
   2053}
   2054
   2055static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
   2056{
   2057	int ret = 0;
   2058
   2059	trace_kvm_apic_write(reg, val);
   2060
   2061	switch (reg) {
   2062	case APIC_ID:		/* Local APIC ID */
   2063		if (!apic_x2apic_mode(apic)) {
   2064			kvm_apic_set_xapic_id(apic, val >> 24);
   2065			kvm_lapic_xapic_id_updated(apic);
   2066		} else {
   2067			ret = 1;
   2068		}
   2069		break;
   2070
   2071	case APIC_TASKPRI:
   2072		report_tpr_access(apic, true);
   2073		apic_set_tpr(apic, val & 0xff);
   2074		break;
   2075
   2076	case APIC_EOI:
   2077		apic_set_eoi(apic);
   2078		break;
   2079
   2080	case APIC_LDR:
   2081		if (!apic_x2apic_mode(apic))
   2082			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
   2083		else
   2084			ret = 1;
   2085		break;
   2086
   2087	case APIC_DFR:
   2088		if (!apic_x2apic_mode(apic))
   2089			kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
   2090		else
   2091			ret = 1;
   2092		break;
   2093
   2094	case APIC_SPIV: {
   2095		u32 mask = 0x3ff;
   2096		if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
   2097			mask |= APIC_SPIV_DIRECTED_EOI;
   2098		apic_set_spiv(apic, val & mask);
   2099		if (!(val & APIC_SPIV_APIC_ENABLED)) {
   2100			int i;
   2101			u32 lvt_val;
   2102
   2103			for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
   2104				lvt_val = kvm_lapic_get_reg(apic,
   2105						       APIC_LVTT + 0x10 * i);
   2106				kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
   2107					     lvt_val | APIC_LVT_MASKED);
   2108			}
   2109			apic_update_lvtt(apic);
   2110			atomic_set(&apic->lapic_timer.pending, 0);
   2111
   2112		}
   2113		break;
   2114	}
   2115	case APIC_ICR:
   2116		WARN_ON_ONCE(apic_x2apic_mode(apic));
   2117
   2118		/* No delay here, so we always clear the pending bit */
   2119		val &= ~APIC_ICR_BUSY;
   2120		kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
   2121		kvm_lapic_set_reg(apic, APIC_ICR, val);
   2122		break;
   2123	case APIC_ICR2:
   2124		if (apic_x2apic_mode(apic))
   2125			ret = 1;
   2126		else
   2127			kvm_lapic_set_reg(apic, APIC_ICR2, val & 0xff000000);
   2128		break;
   2129
   2130	case APIC_LVT0:
   2131		apic_manage_nmi_watchdog(apic, val);
   2132		fallthrough;
   2133	case APIC_LVTTHMR:
   2134	case APIC_LVTPC:
   2135	case APIC_LVT1:
   2136	case APIC_LVTERR: {
   2137		/* TODO: Check vector */
   2138		size_t size;
   2139		u32 index;
   2140
   2141		if (!kvm_apic_sw_enabled(apic))
   2142			val |= APIC_LVT_MASKED;
   2143		size = ARRAY_SIZE(apic_lvt_mask);
   2144		index = array_index_nospec(
   2145				(reg - APIC_LVTT) >> 4, size);
   2146		val &= apic_lvt_mask[index];
   2147		kvm_lapic_set_reg(apic, reg, val);
   2148		break;
   2149	}
   2150
   2151	case APIC_LVTT:
   2152		if (!kvm_apic_sw_enabled(apic))
   2153			val |= APIC_LVT_MASKED;
   2154		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
   2155		kvm_lapic_set_reg(apic, APIC_LVTT, val);
   2156		apic_update_lvtt(apic);
   2157		break;
   2158
   2159	case APIC_TMICT:
   2160		if (apic_lvtt_tscdeadline(apic))
   2161			break;
   2162
   2163		cancel_apic_timer(apic);
   2164		kvm_lapic_set_reg(apic, APIC_TMICT, val);
   2165		start_apic_timer(apic);
   2166		break;
   2167
   2168	case APIC_TDCR: {
   2169		uint32_t old_divisor = apic->divide_count;
   2170
   2171		kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
   2172		update_divide_count(apic);
   2173		if (apic->divide_count != old_divisor &&
   2174				apic->lapic_timer.period) {
   2175			hrtimer_cancel(&apic->lapic_timer.timer);
   2176			update_target_expiration(apic, old_divisor);
   2177			restart_apic_timer(apic);
   2178		}
   2179		break;
   2180	}
   2181	case APIC_ESR:
   2182		if (apic_x2apic_mode(apic) && val != 0)
   2183			ret = 1;
   2184		break;
   2185
   2186	case APIC_SELF_IPI:
   2187		if (apic_x2apic_mode(apic))
   2188			kvm_apic_send_ipi(apic, APIC_DEST_SELF | (val & APIC_VECTOR_MASK), 0);
   2189		else
   2190			ret = 1;
   2191		break;
   2192	default:
   2193		ret = 1;
   2194		break;
   2195	}
   2196
   2197	/*
   2198	 * Recalculate APIC maps if necessary, e.g. if the software enable bit
   2199	 * was toggled, the APIC ID changed, etc...   The maps are marked dirty
   2200	 * on relevant changes, i.e. this is a nop for most writes.
   2201	 */
   2202	kvm_recalculate_apic_map(apic->vcpu->kvm);
   2203
   2204	return ret;
   2205}
   2206
   2207static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
   2208			    gpa_t address, int len, const void *data)
   2209{
   2210	struct kvm_lapic *apic = to_lapic(this);
   2211	unsigned int offset = address - apic->base_address;
   2212	u32 val;
   2213
   2214	if (!apic_mmio_in_range(apic, address))
   2215		return -EOPNOTSUPP;
   2216
   2217	if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
   2218		if (!kvm_check_has_quirk(vcpu->kvm,
   2219					 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
   2220			return -EOPNOTSUPP;
   2221
   2222		return 0;
   2223	}
   2224
   2225	/*
   2226	 * APIC register must be aligned on 128-bits boundary.
   2227	 * 32/64/128 bits registers must be accessed thru 32 bits.
   2228	 * Refer SDM 8.4.1
   2229	 */
   2230	if (len != 4 || (offset & 0xf))
   2231		return 0;
   2232
   2233	val = *(u32*)data;
   2234
   2235	kvm_lapic_reg_write(apic, offset & 0xff0, val);
   2236
   2237	return 0;
   2238}
   2239
   2240void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
   2241{
   2242	kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
   2243}
   2244EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
   2245
   2246/* emulate APIC access in a trap manner */
   2247void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
   2248{
   2249	u32 val = kvm_lapic_get_reg(vcpu->arch.apic, offset);
   2250
   2251	/* TODO: optimize to just emulate side effect w/o one more write */
   2252	kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
   2253}
   2254EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
   2255
   2256void kvm_free_lapic(struct kvm_vcpu *vcpu)
   2257{
   2258	struct kvm_lapic *apic = vcpu->arch.apic;
   2259
   2260	if (!vcpu->arch.apic)
   2261		return;
   2262
   2263	hrtimer_cancel(&apic->lapic_timer.timer);
   2264
   2265	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
   2266		static_branch_slow_dec_deferred(&apic_hw_disabled);
   2267
   2268	if (!apic->sw_enabled)
   2269		static_branch_slow_dec_deferred(&apic_sw_disabled);
   2270
   2271	if (apic->regs)
   2272		free_page((unsigned long)apic->regs);
   2273
   2274	kfree(apic);
   2275}
   2276
   2277/*
   2278 *----------------------------------------------------------------------
   2279 * LAPIC interface
   2280 *----------------------------------------------------------------------
   2281 */
   2282u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
   2283{
   2284	struct kvm_lapic *apic = vcpu->arch.apic;
   2285
   2286	if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
   2287		return 0;
   2288
   2289	return apic->lapic_timer.tscdeadline;
   2290}
   2291
   2292void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
   2293{
   2294	struct kvm_lapic *apic = vcpu->arch.apic;
   2295
   2296	if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
   2297		return;
   2298
   2299	hrtimer_cancel(&apic->lapic_timer.timer);
   2300	apic->lapic_timer.tscdeadline = data;
   2301	start_apic_timer(apic);
   2302}
   2303
   2304void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
   2305{
   2306	apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4);
   2307}
   2308
   2309u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
   2310{
   2311	u64 tpr;
   2312
   2313	tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
   2314
   2315	return (tpr & 0xf0) >> 4;
   2316}
   2317
   2318void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
   2319{
   2320	u64 old_value = vcpu->arch.apic_base;
   2321	struct kvm_lapic *apic = vcpu->arch.apic;
   2322
   2323	vcpu->arch.apic_base = value;
   2324
   2325	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
   2326		kvm_update_cpuid_runtime(vcpu);
   2327
   2328	if (!apic)
   2329		return;
   2330
   2331	/* update jump label if enable bit changes */
   2332	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
   2333		if (value & MSR_IA32_APICBASE_ENABLE) {
   2334			kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
   2335			static_branch_slow_dec_deferred(&apic_hw_disabled);
   2336			/* Check if there are APF page ready requests pending */
   2337			kvm_make_request(KVM_REQ_APF_READY, vcpu);
   2338		} else {
   2339			static_branch_inc(&apic_hw_disabled.key);
   2340			atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
   2341		}
   2342	}
   2343
   2344	if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
   2345		kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
   2346
   2347	if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
   2348		static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu);
   2349
   2350	apic->base_address = apic->vcpu->arch.apic_base &
   2351			     MSR_IA32_APICBASE_BASE;
   2352
   2353	if ((value & MSR_IA32_APICBASE_ENABLE) &&
   2354	     apic->base_address != APIC_DEFAULT_PHYS_BASE) {
   2355		kvm_set_apicv_inhibit(apic->vcpu->kvm,
   2356				      APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
   2357	}
   2358}
   2359
   2360void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
   2361{
   2362	struct kvm_lapic *apic = vcpu->arch.apic;
   2363
   2364	if (vcpu->arch.apicv_active) {
   2365		/* irr_pending is always true when apicv is activated. */
   2366		apic->irr_pending = true;
   2367		apic->isr_count = 1;
   2368	} else {
   2369		/*
   2370		 * Don't clear irr_pending, searching the IRR can race with
   2371		 * updates from the CPU as APICv is still active from hardware's
   2372		 * perspective.  The flag will be cleared as appropriate when
   2373		 * KVM injects the interrupt.
   2374		 */
   2375		apic->isr_count = count_vectors(apic->regs + APIC_ISR);
   2376	}
   2377}
   2378EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
   2379
   2380void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
   2381{
   2382	struct kvm_lapic *apic = vcpu->arch.apic;
   2383	u64 msr_val;
   2384	int i;
   2385
   2386	if (!init_event) {
   2387		msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
   2388		if (kvm_vcpu_is_reset_bsp(vcpu))
   2389			msr_val |= MSR_IA32_APICBASE_BSP;
   2390		kvm_lapic_set_base(vcpu, msr_val);
   2391	}
   2392
   2393	if (!apic)
   2394		return;
   2395
   2396	/* Stop the timer in case it's a reset to an active apic */
   2397	hrtimer_cancel(&apic->lapic_timer.timer);
   2398
   2399	/* The xAPIC ID is set at RESET even if the APIC was already enabled. */
   2400	if (!init_event)
   2401		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
   2402	kvm_apic_set_version(apic->vcpu);
   2403
   2404	for (i = 0; i < KVM_APIC_LVT_NUM; i++)
   2405		kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
   2406	apic_update_lvtt(apic);
   2407	if (kvm_vcpu_is_reset_bsp(vcpu) &&
   2408	    kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
   2409		kvm_lapic_set_reg(apic, APIC_LVT0,
   2410			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
   2411	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
   2412
   2413	kvm_apic_set_dfr(apic, 0xffffffffU);
   2414	apic_set_spiv(apic, 0xff);
   2415	kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
   2416	if (!apic_x2apic_mode(apic))
   2417		kvm_apic_set_ldr(apic, 0);
   2418	kvm_lapic_set_reg(apic, APIC_ESR, 0);
   2419	if (!apic_x2apic_mode(apic)) {
   2420		kvm_lapic_set_reg(apic, APIC_ICR, 0);
   2421		kvm_lapic_set_reg(apic, APIC_ICR2, 0);
   2422	} else {
   2423		kvm_lapic_set_reg64(apic, APIC_ICR, 0);
   2424	}
   2425	kvm_lapic_set_reg(apic, APIC_TDCR, 0);
   2426	kvm_lapic_set_reg(apic, APIC_TMICT, 0);
   2427	for (i = 0; i < 8; i++) {
   2428		kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
   2429		kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
   2430		kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
   2431	}
   2432	kvm_apic_update_apicv(vcpu);
   2433	apic->highest_isr_cache = -1;
   2434	update_divide_count(apic);
   2435	atomic_set(&apic->lapic_timer.pending, 0);
   2436
   2437	vcpu->arch.pv_eoi.msr_val = 0;
   2438	apic_update_ppr(apic);
   2439	if (vcpu->arch.apicv_active) {
   2440		static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
   2441		static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, -1);
   2442		static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, -1);
   2443	}
   2444
   2445	vcpu->arch.apic_arb_prio = 0;
   2446	vcpu->arch.apic_attention = 0;
   2447
   2448	kvm_recalculate_apic_map(vcpu->kvm);
   2449}
   2450
   2451/*
   2452 *----------------------------------------------------------------------
   2453 * timer interface
   2454 *----------------------------------------------------------------------
   2455 */
   2456
   2457static bool lapic_is_periodic(struct kvm_lapic *apic)
   2458{
   2459	return apic_lvtt_period(apic);
   2460}
   2461
   2462int apic_has_pending_timer(struct kvm_vcpu *vcpu)
   2463{
   2464	struct kvm_lapic *apic = vcpu->arch.apic;
   2465
   2466	if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
   2467		return atomic_read(&apic->lapic_timer.pending);
   2468
   2469	return 0;
   2470}
   2471
   2472int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
   2473{
   2474	u32 reg = kvm_lapic_get_reg(apic, lvt_type);
   2475	int vector, mode, trig_mode;
   2476
   2477	if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
   2478		vector = reg & APIC_VECTOR_MASK;
   2479		mode = reg & APIC_MODE_MASK;
   2480		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
   2481		return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
   2482					NULL);
   2483	}
   2484	return 0;
   2485}
   2486
   2487void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
   2488{
   2489	struct kvm_lapic *apic = vcpu->arch.apic;
   2490
   2491	if (apic)
   2492		kvm_apic_local_deliver(apic, APIC_LVT0);
   2493}
   2494
   2495static const struct kvm_io_device_ops apic_mmio_ops = {
   2496	.read     = apic_mmio_read,
   2497	.write    = apic_mmio_write,
   2498};
   2499
   2500static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
   2501{
   2502	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
   2503	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
   2504
   2505	apic_timer_expired(apic, true);
   2506
   2507	if (lapic_is_periodic(apic)) {
   2508		advance_periodic_target_expiration(apic);
   2509		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
   2510		return HRTIMER_RESTART;
   2511	} else
   2512		return HRTIMER_NORESTART;
   2513}
   2514
   2515int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
   2516{
   2517	struct kvm_lapic *apic;
   2518
   2519	ASSERT(vcpu != NULL);
   2520
   2521	apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
   2522	if (!apic)
   2523		goto nomem;
   2524
   2525	vcpu->arch.apic = apic;
   2526
   2527	if (kvm_x86_ops.alloc_apic_backing_page)
   2528		apic->regs = static_call(kvm_x86_alloc_apic_backing_page)(vcpu);
   2529	else
   2530		apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
   2531	if (!apic->regs) {
   2532		printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
   2533		       vcpu->vcpu_id);
   2534		goto nomem_free_apic;
   2535	}
   2536	apic->vcpu = vcpu;
   2537
   2538	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
   2539		     HRTIMER_MODE_ABS_HARD);
   2540	apic->lapic_timer.timer.function = apic_timer_fn;
   2541	if (timer_advance_ns == -1) {
   2542		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
   2543		lapic_timer_advance_dynamic = true;
   2544	} else {
   2545		apic->lapic_timer.timer_advance_ns = timer_advance_ns;
   2546		lapic_timer_advance_dynamic = false;
   2547	}
   2548
   2549	/*
   2550	 * Stuff the APIC ENABLE bit in lieu of temporarily incrementing
   2551	 * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset().
   2552	 */
   2553	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
   2554	static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
   2555	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
   2556
   2557	return 0;
   2558nomem_free_apic:
   2559	kfree(apic);
   2560	vcpu->arch.apic = NULL;
   2561nomem:
   2562	return -ENOMEM;
   2563}
   2564
   2565int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
   2566{
   2567	struct kvm_lapic *apic = vcpu->arch.apic;
   2568	u32 ppr;
   2569
   2570	if (!kvm_apic_present(vcpu))
   2571		return -1;
   2572
   2573	__apic_update_ppr(apic, &ppr);
   2574	return apic_has_interrupt_for_ppr(apic, ppr);
   2575}
   2576EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
   2577
   2578int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
   2579{
   2580	u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
   2581
   2582	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
   2583		return 1;
   2584	if ((lvt0 & APIC_LVT_MASKED) == 0 &&
   2585	    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
   2586		return 1;
   2587	return 0;
   2588}
   2589
   2590void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
   2591{
   2592	struct kvm_lapic *apic = vcpu->arch.apic;
   2593
   2594	if (atomic_read(&apic->lapic_timer.pending) > 0) {
   2595		kvm_apic_inject_pending_timer_irqs(apic);
   2596		atomic_set(&apic->lapic_timer.pending, 0);
   2597	}
   2598}
   2599
   2600int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
   2601{
   2602	int vector = kvm_apic_has_interrupt(vcpu);
   2603	struct kvm_lapic *apic = vcpu->arch.apic;
   2604	u32 ppr;
   2605
   2606	if (vector == -1)
   2607		return -1;
   2608
   2609	/*
   2610	 * We get here even with APIC virtualization enabled, if doing
   2611	 * nested virtualization and L1 runs with the "acknowledge interrupt
   2612	 * on exit" mode.  Then we cannot inject the interrupt via RVI,
   2613	 * because the process would deliver it through the IDT.
   2614	 */
   2615
   2616	apic_clear_irr(vector, apic);
   2617	if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) {
   2618		/*
   2619		 * For auto-EOI interrupts, there might be another pending
   2620		 * interrupt above PPR, so check whether to raise another
   2621		 * KVM_REQ_EVENT.
   2622		 */
   2623		apic_update_ppr(apic);
   2624	} else {
   2625		/*
   2626		 * For normal interrupts, PPR has been raised and there cannot
   2627		 * be a higher-priority pending interrupt---except if there was
   2628		 * a concurrent interrupt injection, but that would have
   2629		 * triggered KVM_REQ_EVENT already.
   2630		 */
   2631		apic_set_isr(vector, apic);
   2632		__apic_update_ppr(apic, &ppr);
   2633	}
   2634
   2635	return vector;
   2636}
   2637
   2638static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
   2639		struct kvm_lapic_state *s, bool set)
   2640{
   2641	if (apic_x2apic_mode(vcpu->arch.apic)) {
   2642		u32 *id = (u32 *)(s->regs + APIC_ID);
   2643		u32 *ldr = (u32 *)(s->regs + APIC_LDR);
   2644		u64 icr;
   2645
   2646		if (vcpu->kvm->arch.x2apic_format) {
   2647			if (*id != vcpu->vcpu_id)
   2648				return -EINVAL;
   2649		} else {
   2650			if (set)
   2651				*id >>= 24;
   2652			else
   2653				*id <<= 24;
   2654		}
   2655
   2656		/*
   2657		 * In x2APIC mode, the LDR is fixed and based on the id.  And
   2658		 * ICR is internally a single 64-bit register, but needs to be
   2659		 * split to ICR+ICR2 in userspace for backwards compatibility.
   2660		 */
   2661		if (set) {
   2662			*ldr = kvm_apic_calc_x2apic_ldr(*id);
   2663
   2664			icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
   2665			      (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
   2666			__kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
   2667		} else {
   2668			icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
   2669			__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
   2670		}
   2671	} else {
   2672		kvm_lapic_xapic_id_updated(vcpu->arch.apic);
   2673	}
   2674
   2675	return 0;
   2676}
   2677
   2678int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
   2679{
   2680	memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
   2681
   2682	/*
   2683	 * Get calculated timer current count for remaining timer period (if
   2684	 * any) and store it in the returned register set.
   2685	 */
   2686	__kvm_lapic_set_reg(s->regs, APIC_TMCCT,
   2687			    __apic_read(vcpu->arch.apic, APIC_TMCCT));
   2688
   2689	return kvm_apic_state_fixup(vcpu, s, false);
   2690}
   2691
   2692int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
   2693{
   2694	struct kvm_lapic *apic = vcpu->arch.apic;
   2695	int r;
   2696
   2697	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
   2698	/* set SPIV separately to get count of SW disabled APICs right */
   2699	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
   2700
   2701	r = kvm_apic_state_fixup(vcpu, s, true);
   2702	if (r) {
   2703		kvm_recalculate_apic_map(vcpu->kvm);
   2704		return r;
   2705	}
   2706	memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
   2707
   2708	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
   2709	kvm_recalculate_apic_map(vcpu->kvm);
   2710	kvm_apic_set_version(vcpu);
   2711
   2712	apic_update_ppr(apic);
   2713	cancel_apic_timer(apic);
   2714	apic->lapic_timer.expired_tscdeadline = 0;
   2715	apic_update_lvtt(apic);
   2716	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
   2717	update_divide_count(apic);
   2718	__start_apic_timer(apic, APIC_TMCCT);
   2719	kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
   2720	kvm_apic_update_apicv(vcpu);
   2721	apic->highest_isr_cache = -1;
   2722	if (vcpu->arch.apicv_active) {
   2723		static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
   2724		static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
   2725		static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
   2726	}
   2727	kvm_make_request(KVM_REQ_EVENT, vcpu);
   2728	if (ioapic_in_kernel(vcpu->kvm))
   2729		kvm_rtc_eoi_tracking_restore_one(vcpu);
   2730
   2731	vcpu->arch.apic_arb_prio = 0;
   2732
   2733	return 0;
   2734}
   2735
   2736void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
   2737{
   2738	struct hrtimer *timer;
   2739
   2740	if (!lapic_in_kernel(vcpu) ||
   2741		kvm_can_post_timer_interrupt(vcpu))
   2742		return;
   2743
   2744	timer = &vcpu->arch.apic->lapic_timer.timer;
   2745	if (hrtimer_cancel(timer))
   2746		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
   2747}
   2748
   2749/*
   2750 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
   2751 *
   2752 * Detect whether guest triggered PV EOI since the
   2753 * last entry. If yes, set EOI on guests's behalf.
   2754 * Clear PV EOI in guest memory in any case.
   2755 */
   2756static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
   2757					struct kvm_lapic *apic)
   2758{
   2759	int vector;
   2760	/*
   2761	 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
   2762	 * and KVM_PV_EOI_ENABLED in guest memory as follows:
   2763	 *
   2764	 * KVM_APIC_PV_EOI_PENDING is unset:
   2765	 * 	-> host disabled PV EOI.
   2766	 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
   2767	 * 	-> host enabled PV EOI, guest did not execute EOI yet.
   2768	 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
   2769	 * 	-> host enabled PV EOI, guest executed EOI.
   2770	 */
   2771	BUG_ON(!pv_eoi_enabled(vcpu));
   2772
   2773	if (pv_eoi_test_and_clr_pending(vcpu))
   2774		return;
   2775	vector = apic_set_eoi(apic);
   2776	trace_kvm_pv_eoi(apic, vector);
   2777}
   2778
   2779void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
   2780{
   2781	u32 data;
   2782
   2783	if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
   2784		apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
   2785
   2786	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
   2787		return;
   2788
   2789	if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
   2790				  sizeof(u32)))
   2791		return;
   2792
   2793	apic_set_tpr(vcpu->arch.apic, data & 0xff);
   2794}
   2795
   2796/*
   2797 * apic_sync_pv_eoi_to_guest - called before vmentry
   2798 *
   2799 * Detect whether it's safe to enable PV EOI and
   2800 * if yes do so.
   2801 */
   2802static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
   2803					struct kvm_lapic *apic)
   2804{
   2805	if (!pv_eoi_enabled(vcpu) ||
   2806	    /* IRR set or many bits in ISR: could be nested. */
   2807	    apic->irr_pending ||
   2808	    /* Cache not set: could be safe but we don't bother. */
   2809	    apic->highest_isr_cache == -1 ||
   2810	    /* Need EOI to update ioapic. */
   2811	    kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
   2812		/*
   2813		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
   2814		 * so we need not do anything here.
   2815		 */
   2816		return;
   2817	}
   2818
   2819	pv_eoi_set_pending(apic->vcpu);
   2820}
   2821
   2822void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
   2823{
   2824	u32 data, tpr;
   2825	int max_irr, max_isr;
   2826	struct kvm_lapic *apic = vcpu->arch.apic;
   2827
   2828	apic_sync_pv_eoi_to_guest(vcpu, apic);
   2829
   2830	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
   2831		return;
   2832
   2833	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
   2834	max_irr = apic_find_highest_irr(apic);
   2835	if (max_irr < 0)
   2836		max_irr = 0;
   2837	max_isr = apic_find_highest_isr(apic);
   2838	if (max_isr < 0)
   2839		max_isr = 0;
   2840	data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
   2841
   2842	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
   2843				sizeof(u32));
   2844}
   2845
   2846int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
   2847{
   2848	if (vapic_addr) {
   2849		if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
   2850					&vcpu->arch.apic->vapic_cache,
   2851					vapic_addr, sizeof(u32)))
   2852			return -EINVAL;
   2853		__set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
   2854	} else {
   2855		__clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
   2856	}
   2857
   2858	vcpu->arch.apic->vapic_addr = vapic_addr;
   2859	return 0;
   2860}
   2861
   2862int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
   2863{
   2864	data &= ~APIC_ICR_BUSY;
   2865
   2866	kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
   2867	kvm_lapic_set_reg64(apic, APIC_ICR, data);
   2868	trace_kvm_apic_write(APIC_ICR, data);
   2869	return 0;
   2870}
   2871
   2872static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
   2873{
   2874	u32 low;
   2875
   2876	if (reg == APIC_ICR) {
   2877		*data = kvm_lapic_get_reg64(apic, APIC_ICR);
   2878		return 0;
   2879	}
   2880
   2881	if (kvm_lapic_reg_read(apic, reg, 4, &low))
   2882		return 1;
   2883
   2884	*data = low;
   2885
   2886	return 0;
   2887}
   2888
   2889static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data)
   2890{
   2891	/*
   2892	 * ICR is a 64-bit register in x2APIC mode (and Hyper'v PV vAPIC) and
   2893	 * can be written as such, all other registers remain accessible only
   2894	 * through 32-bit reads/writes.
   2895	 */
   2896	if (reg == APIC_ICR)
   2897		return kvm_x2apic_icr_write(apic, data);
   2898
   2899	return kvm_lapic_reg_write(apic, reg, (u32)data);
   2900}
   2901
   2902int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
   2903{
   2904	struct kvm_lapic *apic = vcpu->arch.apic;
   2905	u32 reg = (msr - APIC_BASE_MSR) << 4;
   2906
   2907	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
   2908		return 1;
   2909
   2910	return kvm_lapic_msr_write(apic, reg, data);
   2911}
   2912
   2913int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
   2914{
   2915	struct kvm_lapic *apic = vcpu->arch.apic;
   2916	u32 reg = (msr - APIC_BASE_MSR) << 4;
   2917
   2918	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
   2919		return 1;
   2920
   2921	if (reg == APIC_DFR)
   2922		return 1;
   2923
   2924	return kvm_lapic_msr_read(apic, reg, data);
   2925}
   2926
   2927int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
   2928{
   2929	if (!lapic_in_kernel(vcpu))
   2930		return 1;
   2931
   2932	return kvm_lapic_msr_write(vcpu->arch.apic, reg, data);
   2933}
   2934
   2935int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
   2936{
   2937	if (!lapic_in_kernel(vcpu))
   2938		return 1;
   2939
   2940	return kvm_lapic_msr_read(vcpu->arch.apic, reg, data);
   2941}
   2942
   2943int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
   2944{
   2945	u64 addr = data & ~KVM_MSR_ENABLED;
   2946	struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
   2947	unsigned long new_len;
   2948	int ret;
   2949
   2950	if (!IS_ALIGNED(addr, 4))
   2951		return 1;
   2952
   2953	if (data & KVM_MSR_ENABLED) {
   2954		if (addr == ghc->gpa && len <= ghc->len)
   2955			new_len = ghc->len;
   2956		else
   2957			new_len = len;
   2958
   2959		ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
   2960		if (ret)
   2961			return ret;
   2962	}
   2963
   2964	vcpu->arch.pv_eoi.msr_val = data;
   2965
   2966	return 0;
   2967}
   2968
   2969int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
   2970{
   2971	struct kvm_lapic *apic = vcpu->arch.apic;
   2972	u8 sipi_vector;
   2973	int r;
   2974	unsigned long pe;
   2975
   2976	if (!lapic_in_kernel(vcpu))
   2977		return 0;
   2978
   2979	/*
   2980	 * Read pending events before calling the check_events
   2981	 * callback.
   2982	 */
   2983	pe = smp_load_acquire(&apic->pending_events);
   2984	if (!pe)
   2985		return 0;
   2986
   2987	if (is_guest_mode(vcpu)) {
   2988		r = kvm_check_nested_events(vcpu);
   2989		if (r < 0)
   2990			return r == -EBUSY ? 0 : r;
   2991		/*
   2992		 * If an event has happened and caused a vmexit,
   2993		 * we know INITs are latched and therefore
   2994		 * we will not incorrectly deliver an APIC
   2995		 * event instead of a vmexit.
   2996		 */
   2997	}
   2998
   2999	/*
   3000	 * INITs are latched while CPU is in specific states
   3001	 * (SMM, VMX root mode, SVM with GIF=0).
   3002	 * Because a CPU cannot be in these states immediately
   3003	 * after it has processed an INIT signal (and thus in
   3004	 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
   3005	 * and leave the INIT pending.
   3006	 */
   3007	if (kvm_vcpu_latch_init(vcpu)) {
   3008		WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
   3009		if (test_bit(KVM_APIC_SIPI, &pe))
   3010			clear_bit(KVM_APIC_SIPI, &apic->pending_events);
   3011		return 0;
   3012	}
   3013
   3014	if (test_bit(KVM_APIC_INIT, &pe)) {
   3015		clear_bit(KVM_APIC_INIT, &apic->pending_events);
   3016		kvm_vcpu_reset(vcpu, true);
   3017		if (kvm_vcpu_is_bsp(apic->vcpu))
   3018			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
   3019		else
   3020			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
   3021	}
   3022	if (test_bit(KVM_APIC_SIPI, &pe)) {
   3023		clear_bit(KVM_APIC_SIPI, &apic->pending_events);
   3024		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
   3025			/* evaluate pending_events before reading the vector */
   3026			smp_rmb();
   3027			sipi_vector = apic->sipi_vector;
   3028			static_call(kvm_x86_vcpu_deliver_sipi_vector)(vcpu, sipi_vector);
   3029			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
   3030		}
   3031	}
   3032	return 0;
   3033}
   3034
   3035void kvm_lapic_exit(void)
   3036{
   3037	static_key_deferred_flush(&apic_hw_disabled);
   3038	WARN_ON(static_branch_unlikely(&apic_hw_disabled.key));
   3039	static_key_deferred_flush(&apic_sw_disabled);
   3040	WARN_ON(static_branch_unlikely(&apic_sw_disabled.key));
   3041}