cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

nested.c (47356B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Kernel-based Virtual Machine driver for Linux
      4 *
      5 * AMD SVM support
      6 *
      7 * Copyright (C) 2006 Qumranet, Inc.
      8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
      9 *
     10 * Authors:
     11 *   Yaniv Kamay  <yaniv@qumranet.com>
     12 *   Avi Kivity   <avi@qumranet.com>
     13 */
     14
     15#define pr_fmt(fmt) "SVM: " fmt
     16
     17#include <linux/kvm_types.h>
     18#include <linux/kvm_host.h>
     19#include <linux/kernel.h>
     20
     21#include <asm/msr-index.h>
     22#include <asm/debugreg.h>
     23
     24#include "kvm_emulate.h"
     25#include "trace.h"
     26#include "mmu.h"
     27#include "x86.h"
     28#include "cpuid.h"
     29#include "lapic.h"
     30#include "svm.h"
     31#include "hyperv.h"
     32
     33#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
     34
     35static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
     36				       struct x86_exception *fault)
     37{
     38	struct vcpu_svm *svm = to_svm(vcpu);
     39	struct vmcb *vmcb = svm->vmcb;
     40
     41	if (vmcb->control.exit_code != SVM_EXIT_NPF) {
     42		/*
     43		 * TODO: track the cause of the nested page fault, and
     44		 * correctly fill in the high bits of exit_info_1.
     45		 */
     46		vmcb->control.exit_code = SVM_EXIT_NPF;
     47		vmcb->control.exit_code_hi = 0;
     48		vmcb->control.exit_info_1 = (1ULL << 32);
     49		vmcb->control.exit_info_2 = fault->address;
     50	}
     51
     52	vmcb->control.exit_info_1 &= ~0xffffffffULL;
     53	vmcb->control.exit_info_1 |= fault->error_code;
     54
     55	nested_svm_vmexit(svm);
     56}
     57
     58static bool nested_svm_handle_page_fault_workaround(struct kvm_vcpu *vcpu,
     59						    struct x86_exception *fault)
     60{
     61	struct vcpu_svm *svm = to_svm(vcpu);
     62	struct vmcb *vmcb = svm->vmcb;
     63
     64 	WARN_ON(!is_guest_mode(vcpu));
     65
     66	if (vmcb12_is_intercept(&svm->nested.ctl,
     67				INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
     68	    !WARN_ON_ONCE(svm->nested.nested_run_pending)) {
     69	     	vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
     70		vmcb->control.exit_code_hi = 0;
     71		vmcb->control.exit_info_1 = fault->error_code;
     72		vmcb->control.exit_info_2 = fault->address;
     73		nested_svm_vmexit(svm);
     74		return true;
     75	}
     76
     77	return false;
     78}
     79
     80static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
     81{
     82	struct vcpu_svm *svm = to_svm(vcpu);
     83	u64 cr3 = svm->nested.ctl.nested_cr3;
     84	u64 pdpte;
     85	int ret;
     86
     87	ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
     88				       offset_in_page(cr3) + index * 8, 8);
     89	if (ret)
     90		return 0;
     91	return pdpte;
     92}
     93
     94static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
     95{
     96	struct vcpu_svm *svm = to_svm(vcpu);
     97
     98	return svm->nested.ctl.nested_cr3;
     99}
    100
    101static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
    102{
    103	struct vcpu_svm *svm = to_svm(vcpu);
    104
    105	WARN_ON(mmu_is_nested(vcpu));
    106
    107	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
    108
    109	/*
    110	 * The NPT format depends on L1's CR4 and EFER, which is in vmcb01.  Note,
    111	 * when called via KVM_SET_NESTED_STATE, that state may _not_ match current
    112	 * vCPU state.  CR0.WP is explicitly ignored, while CR0.PG is required.
    113	 */
    114	kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4,
    115				svm->vmcb01.ptr->save.efer,
    116				svm->nested.ctl.nested_cr3);
    117	vcpu->arch.mmu->get_guest_pgd     = nested_svm_get_tdp_cr3;
    118	vcpu->arch.mmu->get_pdptr         = nested_svm_get_tdp_pdptr;
    119	vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
    120	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
    121}
    122
    123static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
    124{
    125	vcpu->arch.mmu = &vcpu->arch.root_mmu;
    126	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
    127}
    128
    129static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
    130{
    131	if (!svm->v_vmload_vmsave_enabled)
    132		return true;
    133
    134	if (!nested_npt_enabled(svm))
    135		return true;
    136
    137	if (!(svm->nested.ctl.virt_ext & VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK))
    138		return true;
    139
    140	return false;
    141}
    142
    143void recalc_intercepts(struct vcpu_svm *svm)
    144{
    145	struct vmcb_control_area *c, *h;
    146	struct vmcb_ctrl_area_cached *g;
    147	unsigned int i;
    148
    149	vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
    150
    151	if (!is_guest_mode(&svm->vcpu))
    152		return;
    153
    154	c = &svm->vmcb->control;
    155	h = &svm->vmcb01.ptr->control;
    156	g = &svm->nested.ctl;
    157
    158	for (i = 0; i < MAX_INTERCEPT; i++)
    159		c->intercepts[i] = h->intercepts[i];
    160
    161	if (g->int_ctl & V_INTR_MASKING_MASK) {
    162		/* We only want the cr8 intercept bits of L1 */
    163		vmcb_clr_intercept(c, INTERCEPT_CR8_READ);
    164		vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
    165
    166		/*
    167		 * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
    168		 * affect any interrupt we may want to inject; therefore,
    169		 * interrupt window vmexits are irrelevant to L0.
    170		 */
    171		vmcb_clr_intercept(c, INTERCEPT_VINTR);
    172	}
    173
    174	/* We don't want to see VMMCALLs from a nested guest */
    175	vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
    176
    177	for (i = 0; i < MAX_INTERCEPT; i++)
    178		c->intercepts[i] |= g->intercepts[i];
    179
    180	/* If SMI is not intercepted, ignore guest SMI intercept as well  */
    181	if (!intercept_smi)
    182		vmcb_clr_intercept(c, INTERCEPT_SMI);
    183
    184	if (nested_vmcb_needs_vls_intercept(svm)) {
    185		/*
    186		 * If the virtual VMLOAD/VMSAVE is not enabled for the L2,
    187		 * we must intercept these instructions to correctly
    188		 * emulate them in case L1 doesn't intercept them.
    189		 */
    190		vmcb_set_intercept(c, INTERCEPT_VMLOAD);
    191		vmcb_set_intercept(c, INTERCEPT_VMSAVE);
    192	} else {
    193		WARN_ON(!(c->virt_ext & VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK));
    194	}
    195}
    196
    197/*
    198 * Merge L0's (KVM) and L1's (Nested VMCB) MSR permission bitmaps. The function
    199 * is optimized in that it only merges the parts where KVM MSR permission bitmap
    200 * may contain zero bits.
    201 */
    202static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
    203{
    204	struct hv_enlightenments *hve =
    205		(struct hv_enlightenments *)svm->nested.ctl.reserved_sw;
    206	int i;
    207
    208	/*
    209	 * MSR bitmap update can be skipped when:
    210	 * - MSR bitmap for L1 hasn't changed.
    211	 * - Nested hypervisor (L1) is attempting to launch the same L2 as
    212	 *   before.
    213	 * - Nested hypervisor (L1) is using Hyper-V emulation interface and
    214	 * tells KVM (L0) there were no changes in MSR bitmap for L2.
    215	 */
    216	if (!svm->nested.force_msr_bitmap_recalc &&
    217	    kvm_hv_hypercall_enabled(&svm->vcpu) &&
    218	    hve->hv_enlightenments_control.msr_bitmap &&
    219	    (svm->nested.ctl.clean & BIT(VMCB_HV_NESTED_ENLIGHTENMENTS)))
    220		goto set_msrpm_base_pa;
    221
    222	if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
    223		return true;
    224
    225	for (i = 0; i < MSRPM_OFFSETS; i++) {
    226		u32 value, p;
    227		u64 offset;
    228
    229		if (msrpm_offsets[i] == 0xffffffff)
    230			break;
    231
    232		p      = msrpm_offsets[i];
    233		offset = svm->nested.ctl.msrpm_base_pa + (p * 4);
    234
    235		if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
    236			return false;
    237
    238		svm->nested.msrpm[p] = svm->msrpm[p] | value;
    239	}
    240
    241	svm->nested.force_msr_bitmap_recalc = false;
    242
    243set_msrpm_base_pa:
    244	svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
    245
    246	return true;
    247}
    248
    249/*
    250 * Bits 11:0 of bitmap address are ignored by hardware
    251 */
    252static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
    253{
    254	u64 addr = PAGE_ALIGN(pa);
    255
    256	return kvm_vcpu_is_legal_gpa(vcpu, addr) &&
    257	    kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
    258}
    259
    260static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)
    261{
    262	/* Nested FLUSHBYASID is not supported yet.  */
    263	switch(tlb_ctl) {
    264		case TLB_CONTROL_DO_NOTHING:
    265		case TLB_CONTROL_FLUSH_ALL_ASID:
    266			return true;
    267		default:
    268			return false;
    269	}
    270}
    271
    272static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
    273					 struct vmcb_ctrl_area_cached *control)
    274{
    275	if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
    276		return false;
    277
    278	if (CC(control->asid == 0))
    279		return false;
    280
    281	if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
    282		return false;
    283
    284	if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
    285					   MSRPM_SIZE)))
    286		return false;
    287	if (CC(!nested_svm_check_bitmap_pa(vcpu, control->iopm_base_pa,
    288					   IOPM_SIZE)))
    289		return false;
    290
    291	if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
    292		return false;
    293
    294	return true;
    295}
    296
    297/* Common checks that apply to both L1 and L2 state.  */
    298static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu,
    299				     struct vmcb_save_area_cached *save)
    300{
    301	if (CC(!(save->efer & EFER_SVME)))
    302		return false;
    303
    304	if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) ||
    305	    CC(save->cr0 & ~0xffffffffULL))
    306		return false;
    307
    308	if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
    309		return false;
    310
    311	/*
    312	 * These checks are also performed by KVM_SET_SREGS,
    313	 * except that EFER.LMA is not checked by SVM against
    314	 * CR0.PG && EFER.LME.
    315	 */
    316	if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
    317		if (CC(!(save->cr4 & X86_CR4_PAE)) ||
    318		    CC(!(save->cr0 & X86_CR0_PE)) ||
    319		    CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
    320			return false;
    321	}
    322
    323	if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
    324		return false;
    325
    326	if (CC(!kvm_valid_efer(vcpu, save->efer)))
    327		return false;
    328
    329	return true;
    330}
    331
    332static bool nested_vmcb_check_save(struct kvm_vcpu *vcpu)
    333{
    334	struct vcpu_svm *svm = to_svm(vcpu);
    335	struct vmcb_save_area_cached *save = &svm->nested.save;
    336
    337	return __nested_vmcb_check_save(vcpu, save);
    338}
    339
    340static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
    341{
    342	struct vcpu_svm *svm = to_svm(vcpu);
    343	struct vmcb_ctrl_area_cached *ctl = &svm->nested.ctl;
    344
    345	return __nested_vmcb_check_controls(vcpu, ctl);
    346}
    347
    348static
    349void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
    350					 struct vmcb_ctrl_area_cached *to,
    351					 struct vmcb_control_area *from)
    352{
    353	unsigned int i;
    354
    355	for (i = 0; i < MAX_INTERCEPT; i++)
    356		to->intercepts[i] = from->intercepts[i];
    357
    358	to->iopm_base_pa        = from->iopm_base_pa;
    359	to->msrpm_base_pa       = from->msrpm_base_pa;
    360	to->tsc_offset          = from->tsc_offset;
    361	to->tlb_ctl             = from->tlb_ctl;
    362	to->int_ctl             = from->int_ctl;
    363	to->int_vector          = from->int_vector;
    364	to->int_state           = from->int_state;
    365	to->exit_code           = from->exit_code;
    366	to->exit_code_hi        = from->exit_code_hi;
    367	to->exit_info_1         = from->exit_info_1;
    368	to->exit_info_2         = from->exit_info_2;
    369	to->exit_int_info       = from->exit_int_info;
    370	to->exit_int_info_err   = from->exit_int_info_err;
    371	to->nested_ctl          = from->nested_ctl;
    372	to->event_inj           = from->event_inj;
    373	to->event_inj_err       = from->event_inj_err;
    374	to->nested_cr3          = from->nested_cr3;
    375	to->virt_ext            = from->virt_ext;
    376	to->pause_filter_count  = from->pause_filter_count;
    377	to->pause_filter_thresh = from->pause_filter_thresh;
    378
    379	/* Copy asid here because nested_vmcb_check_controls will check it.  */
    380	to->asid           = from->asid;
    381	to->msrpm_base_pa &= ~0x0fffULL;
    382	to->iopm_base_pa  &= ~0x0fffULL;
    383
    384	/* Hyper-V extensions (Enlightened VMCB) */
    385	if (kvm_hv_hypercall_enabled(vcpu)) {
    386		to->clean = from->clean;
    387		memcpy(to->reserved_sw, from->reserved_sw,
    388		       sizeof(struct hv_enlightenments));
    389	}
    390}
    391
    392void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
    393				       struct vmcb_control_area *control)
    394{
    395	__nested_copy_vmcb_control_to_cache(&svm->vcpu, &svm->nested.ctl, control);
    396}
    397
    398static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached *to,
    399					     struct vmcb_save_area *from)
    400{
    401	/*
    402	 * Copy only fields that are validated, as we need them
    403	 * to avoid TOC/TOU races.
    404	 */
    405	to->efer = from->efer;
    406	to->cr0 = from->cr0;
    407	to->cr3 = from->cr3;
    408	to->cr4 = from->cr4;
    409
    410	to->dr6 = from->dr6;
    411	to->dr7 = from->dr7;
    412}
    413
    414void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
    415				    struct vmcb_save_area *save)
    416{
    417	__nested_copy_vmcb_save_to_cache(&svm->nested.save, save);
    418}
    419
    420/*
    421 * Synchronize fields that are written by the processor, so that
    422 * they can be copied back into the vmcb12.
    423 */
    424void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
    425{
    426	u32 mask;
    427	svm->nested.ctl.event_inj      = svm->vmcb->control.event_inj;
    428	svm->nested.ctl.event_inj_err  = svm->vmcb->control.event_inj_err;
    429
    430	/* Only a few fields of int_ctl are written by the processor.  */
    431	mask = V_IRQ_MASK | V_TPR_MASK;
    432	if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
    433	    svm_is_intercept(svm, INTERCEPT_VINTR)) {
    434		/*
    435		 * In order to request an interrupt window, L0 is usurping
    436		 * svm->vmcb->control.int_ctl and possibly setting V_IRQ
    437		 * even if it was clear in L1's VMCB.  Restoring it would be
    438		 * wrong.  However, in this case V_IRQ will remain true until
    439		 * interrupt_window_interception calls svm_clear_vintr and
    440		 * restores int_ctl.  We can just leave it aside.
    441		 */
    442		mask &= ~V_IRQ_MASK;
    443	}
    444
    445	if (nested_vgif_enabled(svm))
    446		mask |= V_GIF_MASK;
    447
    448	svm->nested.ctl.int_ctl        &= ~mask;
    449	svm->nested.ctl.int_ctl        |= svm->vmcb->control.int_ctl & mask;
    450}
    451
    452/*
    453 * Transfer any event that L0 or L1 wanted to inject into L2 to
    454 * EXIT_INT_INFO.
    455 */
    456static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm,
    457						struct vmcb *vmcb12)
    458{
    459	struct kvm_vcpu *vcpu = &svm->vcpu;
    460	u32 exit_int_info = 0;
    461	unsigned int nr;
    462
    463	if (vcpu->arch.exception.injected) {
    464		nr = vcpu->arch.exception.nr;
    465		exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
    466
    467		if (vcpu->arch.exception.has_error_code) {
    468			exit_int_info |= SVM_EVTINJ_VALID_ERR;
    469			vmcb12->control.exit_int_info_err =
    470				vcpu->arch.exception.error_code;
    471		}
    472
    473	} else if (vcpu->arch.nmi_injected) {
    474		exit_int_info = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
    475
    476	} else if (vcpu->arch.interrupt.injected) {
    477		nr = vcpu->arch.interrupt.nr;
    478		exit_int_info = nr | SVM_EVTINJ_VALID;
    479
    480		if (vcpu->arch.interrupt.soft)
    481			exit_int_info |= SVM_EVTINJ_TYPE_SOFT;
    482		else
    483			exit_int_info |= SVM_EVTINJ_TYPE_INTR;
    484	}
    485
    486	vmcb12->control.exit_int_info = exit_int_info;
    487}
    488
    489static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
    490{
    491	/*
    492	 * TODO: optimize unconditional TLB flush/MMU sync.  A partial list of
    493	 * things to fix before this can be conditional:
    494	 *
    495	 *  - Flush TLBs for both L1 and L2 remote TLB flush
    496	 *  - Honor L1's request to flush an ASID on nested VMRUN
    497	 *  - Sync nested NPT MMU on VMRUN that flushes L2's ASID[*]
    498	 *  - Don't crush a pending TLB flush in vmcb02 on nested VMRUN
    499	 *  - Flush L1's ASID on KVM_REQ_TLB_FLUSH_GUEST
    500	 *
    501	 * [*] Unlike nested EPT, SVM's ASID management can invalidate nested
    502	 *     NPT guest-physical mappings on VMRUN.
    503	 */
    504	kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
    505	kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
    506}
    507
    508/*
    509 * Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true
    510 * if we are emulating VM-Entry into a guest with NPT enabled.
    511 */
    512static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
    513			       bool nested_npt, bool reload_pdptrs)
    514{
    515	if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3)))
    516		return -EINVAL;
    517
    518	if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
    519	    CC(!load_pdptrs(vcpu, cr3)))
    520		return -EINVAL;
    521
    522	vcpu->arch.cr3 = cr3;
    523
    524	/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
    525	kvm_init_mmu(vcpu);
    526
    527	if (!nested_npt)
    528		kvm_mmu_new_pgd(vcpu, cr3);
    529
    530	return 0;
    531}
    532
    533void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
    534{
    535	if (!svm->nested.vmcb02.ptr)
    536		return;
    537
    538	/* FIXME: merge g_pat from vmcb01 and vmcb12.  */
    539	svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
    540}
    541
    542static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
    543{
    544	bool new_vmcb12 = false;
    545	struct vmcb *vmcb01 = svm->vmcb01.ptr;
    546	struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
    547
    548	nested_vmcb02_compute_g_pat(svm);
    549
    550	/* Load the nested guest state */
    551	if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) {
    552		new_vmcb12 = true;
    553		svm->nested.last_vmcb12_gpa = svm->nested.vmcb12_gpa;
    554		svm->nested.force_msr_bitmap_recalc = true;
    555	}
    556
    557	if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) {
    558		vmcb02->save.es = vmcb12->save.es;
    559		vmcb02->save.cs = vmcb12->save.cs;
    560		vmcb02->save.ss = vmcb12->save.ss;
    561		vmcb02->save.ds = vmcb12->save.ds;
    562		vmcb02->save.cpl = vmcb12->save.cpl;
    563		vmcb_mark_dirty(vmcb02, VMCB_SEG);
    564	}
    565
    566	if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) {
    567		vmcb02->save.gdtr = vmcb12->save.gdtr;
    568		vmcb02->save.idtr = vmcb12->save.idtr;
    569		vmcb_mark_dirty(vmcb02, VMCB_DT);
    570	}
    571
    572	kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
    573
    574	svm_set_efer(&svm->vcpu, svm->nested.save.efer);
    575
    576	svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
    577	svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);
    578
    579	svm->vcpu.arch.cr2 = vmcb12->save.cr2;
    580
    581	kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
    582	kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
    583	kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
    584
    585	/* In case we don't even reach vcpu_run, the fields are not updated */
    586	vmcb02->save.rax = vmcb12->save.rax;
    587	vmcb02->save.rsp = vmcb12->save.rsp;
    588	vmcb02->save.rip = vmcb12->save.rip;
    589
    590	/* These bits will be set properly on the first execution when new_vmc12 is true */
    591	if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
    592		vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
    593		svm->vcpu.arch.dr6  = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
    594		vmcb_mark_dirty(vmcb02, VMCB_DR);
    595	}
    596
    597	if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
    598		/*
    599		 * Reserved bits of DEBUGCTL are ignored.  Be consistent with
    600		 * svm_set_msr's definition of reserved bits.
    601		 */
    602		svm_copy_lbrs(vmcb02, vmcb12);
    603		vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
    604		svm_update_lbrv(&svm->vcpu);
    605
    606	} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
    607		svm_copy_lbrs(vmcb02, vmcb01);
    608	}
    609}
    610
    611static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
    612{
    613	u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
    614	u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
    615
    616	struct kvm_vcpu *vcpu = &svm->vcpu;
    617	struct vmcb *vmcb01 = svm->vmcb01.ptr;
    618	struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
    619	u32 pause_count12;
    620	u32 pause_thresh12;
    621
    622	/*
    623	 * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
    624	 * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
    625	 */
    626
    627	if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
    628		int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
    629	else
    630		int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
    631
    632	/* Copied from vmcb01.  msrpm_base can be overwritten later.  */
    633	vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
    634	vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
    635	vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa;
    636
    637	/* Done at vmrun: asid.  */
    638
    639	/* Also overwritten later if necessary.  */
    640	vmcb02->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
    641
    642	/* nested_cr3.  */
    643	if (nested_npt_enabled(svm))
    644		nested_svm_init_mmu_context(vcpu);
    645
    646	vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
    647			vcpu->arch.l1_tsc_offset,
    648			svm->nested.ctl.tsc_offset,
    649			svm->tsc_ratio_msr);
    650
    651	vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
    652
    653	if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) {
    654		WARN_ON(!svm->tsc_scaling_enabled);
    655		nested_svm_update_tsc_ratio_msr(vcpu);
    656	}
    657
    658	vmcb02->control.int_ctl             =
    659		(svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
    660		(vmcb01->control.int_ctl & int_ctl_vmcb01_bits);
    661
    662	vmcb02->control.int_vector          = svm->nested.ctl.int_vector;
    663	vmcb02->control.int_state           = svm->nested.ctl.int_state;
    664	vmcb02->control.event_inj           = svm->nested.ctl.event_inj;
    665	vmcb02->control.event_inj_err       = svm->nested.ctl.event_inj_err;
    666
    667	vmcb02->control.virt_ext            = vmcb01->control.virt_ext &
    668					      LBR_CTL_ENABLE_MASK;
    669	if (svm->lbrv_enabled)
    670		vmcb02->control.virt_ext  |=
    671			(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
    672
    673	if (!nested_vmcb_needs_vls_intercept(svm))
    674		vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
    675
    676	pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
    677	pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
    678	if (kvm_pause_in_guest(svm->vcpu.kvm)) {
    679		/* use guest values since host doesn't intercept PAUSE */
    680		vmcb02->control.pause_filter_count = pause_count12;
    681		vmcb02->control.pause_filter_thresh = pause_thresh12;
    682
    683	} else {
    684		/* start from host values otherwise */
    685		vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
    686		vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
    687
    688		/* ... but ensure filtering is disabled if so requested.  */
    689		if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
    690			if (!pause_count12)
    691				vmcb02->control.pause_filter_count = 0;
    692			if (!pause_thresh12)
    693				vmcb02->control.pause_filter_thresh = 0;
    694		}
    695	}
    696
    697	nested_svm_transition_tlb_flush(vcpu);
    698
    699	/* Enter Guest-Mode */
    700	enter_guest_mode(vcpu);
    701
    702	/*
    703	 * Merge guest and host intercepts - must be called with vcpu in
    704	 * guest-mode to take effect.
    705	 */
    706	recalc_intercepts(svm);
    707}
    708
    709static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
    710{
    711	/*
    712	 * Some VMCB state is shared between L1 and L2 and thus has to be
    713	 * moved at the time of nested vmrun and vmexit.
    714	 *
    715	 * VMLOAD/VMSAVE state would also belong in this category, but KVM
    716	 * always performs VMLOAD and VMSAVE from the VMCB01.
    717	 */
    718	to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl;
    719}
    720
    721int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
    722			 struct vmcb *vmcb12, bool from_vmrun)
    723{
    724	struct vcpu_svm *svm = to_svm(vcpu);
    725	int ret;
    726
    727	trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa,
    728			       vmcb12->save.rip,
    729			       vmcb12->control.int_ctl,
    730			       vmcb12->control.event_inj,
    731			       vmcb12->control.nested_ctl);
    732
    733	trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
    734				    vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
    735				    vmcb12->control.intercepts[INTERCEPT_EXCEPTION],
    736				    vmcb12->control.intercepts[INTERCEPT_WORD3],
    737				    vmcb12->control.intercepts[INTERCEPT_WORD4],
    738				    vmcb12->control.intercepts[INTERCEPT_WORD5]);
    739
    740
    741	svm->nested.vmcb12_gpa = vmcb12_gpa;
    742
    743	WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr);
    744
    745	nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
    746
    747	svm_switch_vmcb(svm, &svm->nested.vmcb02);
    748	nested_vmcb02_prepare_control(svm);
    749	nested_vmcb02_prepare_save(svm, vmcb12);
    750
    751	ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
    752				  nested_npt_enabled(svm), from_vmrun);
    753	if (ret)
    754		return ret;
    755
    756	if (!from_vmrun)
    757		kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
    758
    759	svm_set_gif(svm, true);
    760
    761	if (kvm_vcpu_apicv_active(vcpu))
    762		kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
    763
    764	return 0;
    765}
    766
    767int nested_svm_vmrun(struct kvm_vcpu *vcpu)
    768{
    769	struct vcpu_svm *svm = to_svm(vcpu);
    770	int ret;
    771	struct vmcb *vmcb12;
    772	struct kvm_host_map map;
    773	u64 vmcb12_gpa;
    774	struct vmcb *vmcb01 = svm->vmcb01.ptr;
    775
    776	if (!svm->nested.hsave_msr) {
    777		kvm_inject_gp(vcpu, 0);
    778		return 1;
    779	}
    780
    781	if (is_smm(vcpu)) {
    782		kvm_queue_exception(vcpu, UD_VECTOR);
    783		return 1;
    784	}
    785
    786	vmcb12_gpa = svm->vmcb->save.rax;
    787	ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
    788	if (ret == -EINVAL) {
    789		kvm_inject_gp(vcpu, 0);
    790		return 1;
    791	} else if (ret) {
    792		return kvm_skip_emulated_instruction(vcpu);
    793	}
    794
    795	ret = kvm_skip_emulated_instruction(vcpu);
    796
    797	vmcb12 = map.hva;
    798
    799	if (WARN_ON_ONCE(!svm->nested.initialized))
    800		return -EINVAL;
    801
    802	nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
    803	nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
    804
    805	if (!nested_vmcb_check_save(vcpu) ||
    806	    !nested_vmcb_check_controls(vcpu)) {
    807		vmcb12->control.exit_code    = SVM_EXIT_ERR;
    808		vmcb12->control.exit_code_hi = 0;
    809		vmcb12->control.exit_info_1  = 0;
    810		vmcb12->control.exit_info_2  = 0;
    811		goto out;
    812	}
    813
    814	/*
    815	 * Since vmcb01 is not in use, we can use it to store some of the L1
    816	 * state.
    817	 */
    818	vmcb01->save.efer   = vcpu->arch.efer;
    819	vmcb01->save.cr0    = kvm_read_cr0(vcpu);
    820	vmcb01->save.cr4    = vcpu->arch.cr4;
    821	vmcb01->save.rflags = kvm_get_rflags(vcpu);
    822	vmcb01->save.rip    = kvm_rip_read(vcpu);
    823
    824	if (!npt_enabled)
    825		vmcb01->save.cr3 = kvm_read_cr3(vcpu);
    826
    827	svm->nested.nested_run_pending = 1;
    828
    829	if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true))
    830		goto out_exit_err;
    831
    832	if (nested_svm_vmrun_msrpm(svm))
    833		goto out;
    834
    835out_exit_err:
    836	svm->nested.nested_run_pending = 0;
    837
    838	svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
    839	svm->vmcb->control.exit_code_hi = 0;
    840	svm->vmcb->control.exit_info_1  = 0;
    841	svm->vmcb->control.exit_info_2  = 0;
    842
    843	nested_svm_vmexit(svm);
    844
    845out:
    846	kvm_vcpu_unmap(vcpu, &map, true);
    847
    848	return ret;
    849}
    850
    851/* Copy state save area fields which are handled by VMRUN */
    852void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
    853			  struct vmcb_save_area *from_save)
    854{
    855	to_save->es = from_save->es;
    856	to_save->cs = from_save->cs;
    857	to_save->ss = from_save->ss;
    858	to_save->ds = from_save->ds;
    859	to_save->gdtr = from_save->gdtr;
    860	to_save->idtr = from_save->idtr;
    861	to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED;
    862	to_save->efer = from_save->efer;
    863	to_save->cr0 = from_save->cr0;
    864	to_save->cr3 = from_save->cr3;
    865	to_save->cr4 = from_save->cr4;
    866	to_save->rax = from_save->rax;
    867	to_save->rsp = from_save->rsp;
    868	to_save->rip = from_save->rip;
    869	to_save->cpl = 0;
    870}
    871
    872void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
    873{
    874	to_vmcb->save.fs = from_vmcb->save.fs;
    875	to_vmcb->save.gs = from_vmcb->save.gs;
    876	to_vmcb->save.tr = from_vmcb->save.tr;
    877	to_vmcb->save.ldtr = from_vmcb->save.ldtr;
    878	to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
    879	to_vmcb->save.star = from_vmcb->save.star;
    880	to_vmcb->save.lstar = from_vmcb->save.lstar;
    881	to_vmcb->save.cstar = from_vmcb->save.cstar;
    882	to_vmcb->save.sfmask = from_vmcb->save.sfmask;
    883	to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
    884	to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
    885	to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
    886}
    887
    888int nested_svm_vmexit(struct vcpu_svm *svm)
    889{
    890	struct kvm_vcpu *vcpu = &svm->vcpu;
    891	struct vmcb *vmcb01 = svm->vmcb01.ptr;
    892	struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
    893	struct vmcb *vmcb12;
    894	struct kvm_host_map map;
    895	int rc;
    896
    897	rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
    898	if (rc) {
    899		if (rc == -EINVAL)
    900			kvm_inject_gp(vcpu, 0);
    901		return 1;
    902	}
    903
    904	vmcb12 = map.hva;
    905
    906	/* Exit Guest-Mode */
    907	leave_guest_mode(vcpu);
    908	svm->nested.vmcb12_gpa = 0;
    909	WARN_ON_ONCE(svm->nested.nested_run_pending);
    910
    911	kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
    912
    913	/* in case we halted in L2 */
    914	svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
    915
    916	/* Give the current vmcb to the guest */
    917
    918	vmcb12->save.es     = vmcb02->save.es;
    919	vmcb12->save.cs     = vmcb02->save.cs;
    920	vmcb12->save.ss     = vmcb02->save.ss;
    921	vmcb12->save.ds     = vmcb02->save.ds;
    922	vmcb12->save.gdtr   = vmcb02->save.gdtr;
    923	vmcb12->save.idtr   = vmcb02->save.idtr;
    924	vmcb12->save.efer   = svm->vcpu.arch.efer;
    925	vmcb12->save.cr0    = kvm_read_cr0(vcpu);
    926	vmcb12->save.cr3    = kvm_read_cr3(vcpu);
    927	vmcb12->save.cr2    = vmcb02->save.cr2;
    928	vmcb12->save.cr4    = svm->vcpu.arch.cr4;
    929	vmcb12->save.rflags = kvm_get_rflags(vcpu);
    930	vmcb12->save.rip    = kvm_rip_read(vcpu);
    931	vmcb12->save.rsp    = kvm_rsp_read(vcpu);
    932	vmcb12->save.rax    = kvm_rax_read(vcpu);
    933	vmcb12->save.dr7    = vmcb02->save.dr7;
    934	vmcb12->save.dr6    = svm->vcpu.arch.dr6;
    935	vmcb12->save.cpl    = vmcb02->save.cpl;
    936
    937	vmcb12->control.int_state         = vmcb02->control.int_state;
    938	vmcb12->control.exit_code         = vmcb02->control.exit_code;
    939	vmcb12->control.exit_code_hi      = vmcb02->control.exit_code_hi;
    940	vmcb12->control.exit_info_1       = vmcb02->control.exit_info_1;
    941	vmcb12->control.exit_info_2       = vmcb02->control.exit_info_2;
    942
    943	if (vmcb12->control.exit_code != SVM_EXIT_ERR)
    944		nested_save_pending_event_to_vmcb12(svm, vmcb12);
    945
    946	if (svm->nrips_enabled)
    947		vmcb12->control.next_rip  = vmcb02->control.next_rip;
    948
    949	vmcb12->control.int_ctl           = svm->nested.ctl.int_ctl;
    950	vmcb12->control.tlb_ctl           = svm->nested.ctl.tlb_ctl;
    951	vmcb12->control.event_inj         = svm->nested.ctl.event_inj;
    952	vmcb12->control.event_inj_err     = svm->nested.ctl.event_inj_err;
    953
    954	if (!kvm_pause_in_guest(vcpu->kvm)) {
    955		vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
    956		vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
    957
    958	}
    959
    960	nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
    961
    962	svm_switch_vmcb(svm, &svm->vmcb01);
    963
    964	if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
    965		svm_copy_lbrs(vmcb12, vmcb02);
    966		svm_update_lbrv(vcpu);
    967	} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
    968		svm_copy_lbrs(vmcb01, vmcb02);
    969		svm_update_lbrv(vcpu);
    970	}
    971
    972	/*
    973	 * On vmexit the  GIF is set to false and
    974	 * no event can be injected in L1.
    975	 */
    976	svm_set_gif(svm, false);
    977	vmcb01->control.exit_int_info = 0;
    978
    979	svm->vcpu.arch.tsc_offset = svm->vcpu.arch.l1_tsc_offset;
    980	if (vmcb01->control.tsc_offset != svm->vcpu.arch.tsc_offset) {
    981		vmcb01->control.tsc_offset = svm->vcpu.arch.tsc_offset;
    982		vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
    983	}
    984
    985	if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) {
    986		WARN_ON(!svm->tsc_scaling_enabled);
    987		vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
    988		__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
    989	}
    990
    991	svm->nested.ctl.nested_cr3 = 0;
    992
    993	/*
    994	 * Restore processor state that had been saved in vmcb01
    995	 */
    996	kvm_set_rflags(vcpu, vmcb01->save.rflags);
    997	svm_set_efer(vcpu, vmcb01->save.efer);
    998	svm_set_cr0(vcpu, vmcb01->save.cr0 | X86_CR0_PE);
    999	svm_set_cr4(vcpu, vmcb01->save.cr4);
   1000	kvm_rax_write(vcpu, vmcb01->save.rax);
   1001	kvm_rsp_write(vcpu, vmcb01->save.rsp);
   1002	kvm_rip_write(vcpu, vmcb01->save.rip);
   1003
   1004	svm->vcpu.arch.dr7 = DR7_FIXED_1;
   1005	kvm_update_dr7(&svm->vcpu);
   1006
   1007	trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
   1008				       vmcb12->control.exit_info_1,
   1009				       vmcb12->control.exit_info_2,
   1010				       vmcb12->control.exit_int_info,
   1011				       vmcb12->control.exit_int_info_err,
   1012				       KVM_ISA_SVM);
   1013
   1014	kvm_vcpu_unmap(vcpu, &map, true);
   1015
   1016	nested_svm_transition_tlb_flush(vcpu);
   1017
   1018	nested_svm_uninit_mmu_context(vcpu);
   1019
   1020	rc = nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true);
   1021	if (rc)
   1022		return 1;
   1023
   1024	/*
   1025	 * Drop what we picked up for L2 via svm_complete_interrupts() so it
   1026	 * doesn't end up in L1.
   1027	 */
   1028	svm->vcpu.arch.nmi_injected = false;
   1029	kvm_clear_exception_queue(vcpu);
   1030	kvm_clear_interrupt_queue(vcpu);
   1031
   1032	/*
   1033	 * If we are here following the completion of a VMRUN that
   1034	 * is being single-stepped, queue the pending #DB intercept
   1035	 * right now so that it an be accounted for before we execute
   1036	 * L1's next instruction.
   1037	 */
   1038	if (unlikely(vmcb01->save.rflags & X86_EFLAGS_TF))
   1039		kvm_queue_exception(&(svm->vcpu), DB_VECTOR);
   1040
   1041	/*
   1042	 * Un-inhibit the AVIC right away, so that other vCPUs can start
   1043	 * to benefit from it right away.
   1044	 */
   1045	if (kvm_apicv_activated(vcpu->kvm))
   1046		kvm_vcpu_update_apicv(vcpu);
   1047
   1048	return 0;
   1049}
   1050
   1051static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
   1052{
   1053	nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN);
   1054}
   1055
   1056int svm_allocate_nested(struct vcpu_svm *svm)
   1057{
   1058	struct page *vmcb02_page;
   1059
   1060	if (svm->nested.initialized)
   1061		return 0;
   1062
   1063	vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
   1064	if (!vmcb02_page)
   1065		return -ENOMEM;
   1066	svm->nested.vmcb02.ptr = page_address(vmcb02_page);
   1067	svm->nested.vmcb02.pa = __sme_set(page_to_pfn(vmcb02_page) << PAGE_SHIFT);
   1068
   1069	svm->nested.msrpm = svm_vcpu_alloc_msrpm();
   1070	if (!svm->nested.msrpm)
   1071		goto err_free_vmcb02;
   1072	svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm);
   1073
   1074	svm->nested.initialized = true;
   1075	return 0;
   1076
   1077err_free_vmcb02:
   1078	__free_page(vmcb02_page);
   1079	return -ENOMEM;
   1080}
   1081
   1082void svm_free_nested(struct vcpu_svm *svm)
   1083{
   1084	if (!svm->nested.initialized)
   1085		return;
   1086
   1087	svm_vcpu_free_msrpm(svm->nested.msrpm);
   1088	svm->nested.msrpm = NULL;
   1089
   1090	__free_page(virt_to_page(svm->nested.vmcb02.ptr));
   1091	svm->nested.vmcb02.ptr = NULL;
   1092
   1093	/*
   1094	 * When last_vmcb12_gpa matches the current vmcb12 gpa,
   1095	 * some vmcb12 fields are not loaded if they are marked clean
   1096	 * in the vmcb12, since in this case they are up to date already.
   1097	 *
   1098	 * When the vmcb02 is freed, this optimization becomes invalid.
   1099	 */
   1100	svm->nested.last_vmcb12_gpa = INVALID_GPA;
   1101
   1102	svm->nested.initialized = false;
   1103}
   1104
   1105/*
   1106 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
   1107 */
   1108void svm_leave_nested(struct kvm_vcpu *vcpu)
   1109{
   1110	struct vcpu_svm *svm = to_svm(vcpu);
   1111
   1112	if (is_guest_mode(vcpu)) {
   1113		svm->nested.nested_run_pending = 0;
   1114		svm->nested.vmcb12_gpa = INVALID_GPA;
   1115
   1116		leave_guest_mode(vcpu);
   1117
   1118		svm_switch_vmcb(svm, &svm->vmcb01);
   1119
   1120		nested_svm_uninit_mmu_context(vcpu);
   1121		vmcb_mark_all_dirty(svm->vmcb);
   1122	}
   1123
   1124	kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
   1125}
   1126
   1127static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
   1128{
   1129	u32 offset, msr, value;
   1130	int write, mask;
   1131
   1132	if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
   1133		return NESTED_EXIT_HOST;
   1134
   1135	msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX];
   1136	offset = svm_msrpm_offset(msr);
   1137	write  = svm->vmcb->control.exit_info_1 & 1;
   1138	mask   = 1 << ((2 * (msr & 0xf)) + write);
   1139
   1140	if (offset == MSR_INVALID)
   1141		return NESTED_EXIT_DONE;
   1142
   1143	/* Offset is in 32 bit units but need in 8 bit units */
   1144	offset *= 4;
   1145
   1146	if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4))
   1147		return NESTED_EXIT_DONE;
   1148
   1149	return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
   1150}
   1151
   1152static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
   1153{
   1154	unsigned port, size, iopm_len;
   1155	u16 val, mask;
   1156	u8 start_bit;
   1157	u64 gpa;
   1158
   1159	if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
   1160		return NESTED_EXIT_HOST;
   1161
   1162	port = svm->vmcb->control.exit_info_1 >> 16;
   1163	size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
   1164		SVM_IOIO_SIZE_SHIFT;
   1165	gpa  = svm->nested.ctl.iopm_base_pa + (port / 8);
   1166	start_bit = port % 8;
   1167	iopm_len = (start_bit + size > 8) ? 2 : 1;
   1168	mask = (0xf >> (4 - size)) << start_bit;
   1169	val = 0;
   1170
   1171	if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
   1172		return NESTED_EXIT_DONE;
   1173
   1174	return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
   1175}
   1176
   1177static int nested_svm_intercept(struct vcpu_svm *svm)
   1178{
   1179	u32 exit_code = svm->vmcb->control.exit_code;
   1180	int vmexit = NESTED_EXIT_HOST;
   1181
   1182	switch (exit_code) {
   1183	case SVM_EXIT_MSR:
   1184		vmexit = nested_svm_exit_handled_msr(svm);
   1185		break;
   1186	case SVM_EXIT_IOIO:
   1187		vmexit = nested_svm_intercept_ioio(svm);
   1188		break;
   1189	case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
   1190		if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
   1191			vmexit = NESTED_EXIT_DONE;
   1192		break;
   1193	}
   1194	case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
   1195		if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
   1196			vmexit = NESTED_EXIT_DONE;
   1197		break;
   1198	}
   1199	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
   1200		/*
   1201		 * Host-intercepted exceptions have been checked already in
   1202		 * nested_svm_exit_special.  There is nothing to do here,
   1203		 * the vmexit is injected by svm_check_nested_events.
   1204		 */
   1205		vmexit = NESTED_EXIT_DONE;
   1206		break;
   1207	}
   1208	case SVM_EXIT_ERR: {
   1209		vmexit = NESTED_EXIT_DONE;
   1210		break;
   1211	}
   1212	default: {
   1213		if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
   1214			vmexit = NESTED_EXIT_DONE;
   1215	}
   1216	}
   1217
   1218	return vmexit;
   1219}
   1220
   1221int nested_svm_exit_handled(struct vcpu_svm *svm)
   1222{
   1223	int vmexit;
   1224
   1225	vmexit = nested_svm_intercept(svm);
   1226
   1227	if (vmexit == NESTED_EXIT_DONE)
   1228		nested_svm_vmexit(svm);
   1229
   1230	return vmexit;
   1231}
   1232
   1233int nested_svm_check_permissions(struct kvm_vcpu *vcpu)
   1234{
   1235	if (!(vcpu->arch.efer & EFER_SVME) || !is_paging(vcpu)) {
   1236		kvm_queue_exception(vcpu, UD_VECTOR);
   1237		return 1;
   1238	}
   1239
   1240	if (to_svm(vcpu)->vmcb->save.cpl) {
   1241		kvm_inject_gp(vcpu, 0);
   1242		return 1;
   1243	}
   1244
   1245	return 0;
   1246}
   1247
   1248static bool nested_exit_on_exception(struct vcpu_svm *svm)
   1249{
   1250	unsigned int nr = svm->vcpu.arch.exception.nr;
   1251
   1252	return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(nr));
   1253}
   1254
   1255static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
   1256{
   1257	unsigned int nr = svm->vcpu.arch.exception.nr;
   1258	struct vmcb *vmcb = svm->vmcb;
   1259
   1260	vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
   1261	vmcb->control.exit_code_hi = 0;
   1262
   1263	if (svm->vcpu.arch.exception.has_error_code)
   1264		vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code;
   1265
   1266	/*
   1267	 * EXITINFO2 is undefined for all exception intercepts other
   1268	 * than #PF.
   1269	 */
   1270	if (nr == PF_VECTOR) {
   1271		if (svm->vcpu.arch.exception.nested_apf)
   1272			vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
   1273		else if (svm->vcpu.arch.exception.has_payload)
   1274			vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
   1275		else
   1276			vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
   1277	} else if (nr == DB_VECTOR) {
   1278		/* See inject_pending_event.  */
   1279		kvm_deliver_exception_payload(&svm->vcpu);
   1280		if (svm->vcpu.arch.dr7 & DR7_GD) {
   1281			svm->vcpu.arch.dr7 &= ~DR7_GD;
   1282			kvm_update_dr7(&svm->vcpu);
   1283		}
   1284	} else
   1285		WARN_ON(svm->vcpu.arch.exception.has_payload);
   1286
   1287	nested_svm_vmexit(svm);
   1288}
   1289
   1290static inline bool nested_exit_on_init(struct vcpu_svm *svm)
   1291{
   1292	return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
   1293}
   1294
   1295static int svm_check_nested_events(struct kvm_vcpu *vcpu)
   1296{
   1297	struct vcpu_svm *svm = to_svm(vcpu);
   1298	bool block_nested_events =
   1299		kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending;
   1300	struct kvm_lapic *apic = vcpu->arch.apic;
   1301
   1302	if (lapic_in_kernel(vcpu) &&
   1303	    test_bit(KVM_APIC_INIT, &apic->pending_events)) {
   1304		if (block_nested_events)
   1305			return -EBUSY;
   1306		if (!nested_exit_on_init(svm))
   1307			return 0;
   1308		nested_svm_simple_vmexit(svm, SVM_EXIT_INIT);
   1309		return 0;
   1310	}
   1311
   1312	if (vcpu->arch.exception.pending) {
   1313		/*
   1314		 * Only a pending nested run can block a pending exception.
   1315		 * Otherwise an injected NMI/interrupt should either be
   1316		 * lost or delivered to the nested hypervisor in the EXITINTINFO
   1317		 * vmcb field, while delivering the pending exception.
   1318		 */
   1319		if (svm->nested.nested_run_pending)
   1320                        return -EBUSY;
   1321		if (!nested_exit_on_exception(svm))
   1322			return 0;
   1323		nested_svm_inject_exception_vmexit(svm);
   1324		return 0;
   1325	}
   1326
   1327	if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
   1328		if (block_nested_events)
   1329			return -EBUSY;
   1330		if (!nested_exit_on_smi(svm))
   1331			return 0;
   1332		nested_svm_simple_vmexit(svm, SVM_EXIT_SMI);
   1333		return 0;
   1334	}
   1335
   1336	if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) {
   1337		if (block_nested_events)
   1338			return -EBUSY;
   1339		if (!nested_exit_on_nmi(svm))
   1340			return 0;
   1341		nested_svm_simple_vmexit(svm, SVM_EXIT_NMI);
   1342		return 0;
   1343	}
   1344
   1345	if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) {
   1346		if (block_nested_events)
   1347			return -EBUSY;
   1348		if (!nested_exit_on_intr(svm))
   1349			return 0;
   1350		trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
   1351		nested_svm_simple_vmexit(svm, SVM_EXIT_INTR);
   1352		return 0;
   1353	}
   1354
   1355	return 0;
   1356}
   1357
   1358int nested_svm_exit_special(struct vcpu_svm *svm)
   1359{
   1360	u32 exit_code = svm->vmcb->control.exit_code;
   1361
   1362	switch (exit_code) {
   1363	case SVM_EXIT_INTR:
   1364	case SVM_EXIT_NMI:
   1365	case SVM_EXIT_NPF:
   1366		return NESTED_EXIT_HOST;
   1367	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
   1368		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
   1369
   1370		if (svm->vmcb01.ptr->control.intercepts[INTERCEPT_EXCEPTION] &
   1371		    excp_bits)
   1372			return NESTED_EXIT_HOST;
   1373		else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR &&
   1374			 svm->vcpu.arch.apf.host_apf_flags)
   1375			/* Trap async PF even if not shadowing */
   1376			return NESTED_EXIT_HOST;
   1377		break;
   1378	}
   1379	default:
   1380		break;
   1381	}
   1382
   1383	return NESTED_EXIT_CONTINUE;
   1384}
   1385
   1386void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
   1387{
   1388	struct vcpu_svm *svm = to_svm(vcpu);
   1389
   1390	vcpu->arch.tsc_scaling_ratio =
   1391		kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
   1392					       svm->tsc_ratio_msr);
   1393	__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
   1394}
   1395
   1396/* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
   1397static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
   1398					      struct vmcb_ctrl_area_cached *from)
   1399{
   1400	unsigned int i;
   1401
   1402	memset(dst, 0, sizeof(struct vmcb_control_area));
   1403
   1404	for (i = 0; i < MAX_INTERCEPT; i++)
   1405		dst->intercepts[i] = from->intercepts[i];
   1406
   1407	dst->iopm_base_pa         = from->iopm_base_pa;
   1408	dst->msrpm_base_pa        = from->msrpm_base_pa;
   1409	dst->tsc_offset           = from->tsc_offset;
   1410	dst->asid                 = from->asid;
   1411	dst->tlb_ctl              = from->tlb_ctl;
   1412	dst->int_ctl              = from->int_ctl;
   1413	dst->int_vector           = from->int_vector;
   1414	dst->int_state            = from->int_state;
   1415	dst->exit_code            = from->exit_code;
   1416	dst->exit_code_hi         = from->exit_code_hi;
   1417	dst->exit_info_1          = from->exit_info_1;
   1418	dst->exit_info_2          = from->exit_info_2;
   1419	dst->exit_int_info        = from->exit_int_info;
   1420	dst->exit_int_info_err    = from->exit_int_info_err;
   1421	dst->nested_ctl           = from->nested_ctl;
   1422	dst->event_inj            = from->event_inj;
   1423	dst->event_inj_err        = from->event_inj_err;
   1424	dst->nested_cr3           = from->nested_cr3;
   1425	dst->virt_ext              = from->virt_ext;
   1426	dst->pause_filter_count   = from->pause_filter_count;
   1427	dst->pause_filter_thresh  = from->pause_filter_thresh;
   1428	/* 'clean' and 'reserved_sw' are not changed by KVM */
   1429}
   1430
   1431static int svm_get_nested_state(struct kvm_vcpu *vcpu,
   1432				struct kvm_nested_state __user *user_kvm_nested_state,
   1433				u32 user_data_size)
   1434{
   1435	struct vcpu_svm *svm;
   1436	struct vmcb_control_area *ctl;
   1437	unsigned long r;
   1438	struct kvm_nested_state kvm_state = {
   1439		.flags = 0,
   1440		.format = KVM_STATE_NESTED_FORMAT_SVM,
   1441		.size = sizeof(kvm_state),
   1442	};
   1443	struct vmcb __user *user_vmcb = (struct vmcb __user *)
   1444		&user_kvm_nested_state->data.svm[0];
   1445
   1446	if (!vcpu)
   1447		return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE;
   1448
   1449	svm = to_svm(vcpu);
   1450
   1451	if (user_data_size < kvm_state.size)
   1452		goto out;
   1453
   1454	/* First fill in the header and copy it out.  */
   1455	if (is_guest_mode(vcpu)) {
   1456		kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb12_gpa;
   1457		kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE;
   1458		kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
   1459
   1460		if (svm->nested.nested_run_pending)
   1461			kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
   1462	}
   1463
   1464	if (gif_set(svm))
   1465		kvm_state.flags |= KVM_STATE_NESTED_GIF_SET;
   1466
   1467	if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
   1468		return -EFAULT;
   1469
   1470	if (!is_guest_mode(vcpu))
   1471		goto out;
   1472
   1473	/*
   1474	 * Copy over the full size of the VMCB rather than just the size
   1475	 * of the structs.
   1476	 */
   1477	if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
   1478		return -EFAULT;
   1479
   1480	ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
   1481	if (!ctl)
   1482		return -ENOMEM;
   1483
   1484	nested_copy_vmcb_cache_to_control(ctl, &svm->nested.ctl);
   1485	r = copy_to_user(&user_vmcb->control, ctl,
   1486			 sizeof(user_vmcb->control));
   1487	kfree(ctl);
   1488	if (r)
   1489		return -EFAULT;
   1490
   1491	if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,
   1492			 sizeof(user_vmcb->save)))
   1493		return -EFAULT;
   1494out:
   1495	return kvm_state.size;
   1496}
   1497
   1498static int svm_set_nested_state(struct kvm_vcpu *vcpu,
   1499				struct kvm_nested_state __user *user_kvm_nested_state,
   1500				struct kvm_nested_state *kvm_state)
   1501{
   1502	struct vcpu_svm *svm = to_svm(vcpu);
   1503	struct vmcb __user *user_vmcb = (struct vmcb __user *)
   1504		&user_kvm_nested_state->data.svm[0];
   1505	struct vmcb_control_area *ctl;
   1506	struct vmcb_save_area *save;
   1507	struct vmcb_save_area_cached save_cached;
   1508	struct vmcb_ctrl_area_cached ctl_cached;
   1509	unsigned long cr0;
   1510	int ret;
   1511
   1512	BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) >
   1513		     KVM_STATE_NESTED_SVM_VMCB_SIZE);
   1514
   1515	if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM)
   1516		return -EINVAL;
   1517
   1518	if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE |
   1519				 KVM_STATE_NESTED_RUN_PENDING |
   1520				 KVM_STATE_NESTED_GIF_SET))
   1521		return -EINVAL;
   1522
   1523	/*
   1524	 * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
   1525	 * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
   1526	 */
   1527	if (!(vcpu->arch.efer & EFER_SVME)) {
   1528		/* GIF=1 and no guest mode are required if SVME=0.  */
   1529		if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
   1530			return -EINVAL;
   1531	}
   1532
   1533	/* SMM temporarily disables SVM, so we cannot be in guest mode.  */
   1534	if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
   1535		return -EINVAL;
   1536
   1537	if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
   1538		svm_leave_nested(vcpu);
   1539		svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
   1540		return 0;
   1541	}
   1542
   1543	if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa))
   1544		return -EINVAL;
   1545	if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
   1546		return -EINVAL;
   1547
   1548	ret  = -ENOMEM;
   1549	ctl  = kzalloc(sizeof(*ctl),  GFP_KERNEL_ACCOUNT);
   1550	save = kzalloc(sizeof(*save), GFP_KERNEL_ACCOUNT);
   1551	if (!ctl || !save)
   1552		goto out_free;
   1553
   1554	ret = -EFAULT;
   1555	if (copy_from_user(ctl, &user_vmcb->control, sizeof(*ctl)))
   1556		goto out_free;
   1557	if (copy_from_user(save, &user_vmcb->save, sizeof(*save)))
   1558		goto out_free;
   1559
   1560	ret = -EINVAL;
   1561	__nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl);
   1562	if (!__nested_vmcb_check_controls(vcpu, &ctl_cached))
   1563		goto out_free;
   1564
   1565	/*
   1566	 * Processor state contains L2 state.  Check that it is
   1567	 * valid for guest mode (see nested_vmcb_check_save).
   1568	 */
   1569	cr0 = kvm_read_cr0(vcpu);
   1570        if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
   1571		goto out_free;
   1572
   1573	/*
   1574	 * Validate host state saved from before VMRUN (see
   1575	 * nested_svm_check_permissions).
   1576	 */
   1577	__nested_copy_vmcb_save_to_cache(&save_cached, save);
   1578	if (!(save->cr0 & X86_CR0_PG) ||
   1579	    !(save->cr0 & X86_CR0_PE) ||
   1580	    (save->rflags & X86_EFLAGS_VM) ||
   1581	    !__nested_vmcb_check_save(vcpu, &save_cached))
   1582		goto out_free;
   1583
   1584
   1585	/*
   1586	 * All checks done, we can enter guest mode. Userspace provides
   1587	 * vmcb12.control, which will be combined with L1 and stored into
   1588	 * vmcb02, and the L1 save state which we store in vmcb01.
   1589	 * L2 registers if needed are moved from the current VMCB to VMCB02.
   1590	 */
   1591
   1592	if (is_guest_mode(vcpu))
   1593		svm_leave_nested(vcpu);
   1594	else
   1595		svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
   1596
   1597	svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
   1598
   1599	svm->nested.nested_run_pending =
   1600		!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
   1601
   1602	svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
   1603
   1604	svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
   1605	nested_copy_vmcb_control_to_cache(svm, ctl);
   1606
   1607	svm_switch_vmcb(svm, &svm->nested.vmcb02);
   1608	nested_vmcb02_prepare_control(svm);
   1609
   1610	/*
   1611	 * While the nested guest CR3 is already checked and set by
   1612	 * KVM_SET_SREGS, it was set when nested state was yet loaded,
   1613	 * thus MMU might not be initialized correctly.
   1614	 * Set it again to fix this.
   1615	 */
   1616
   1617	ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
   1618				  nested_npt_enabled(svm), false);
   1619	if (WARN_ON_ONCE(ret))
   1620		goto out_free;
   1621
   1622	svm->nested.force_msr_bitmap_recalc = true;
   1623
   1624	kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
   1625	ret = 0;
   1626out_free:
   1627	kfree(save);
   1628	kfree(ctl);
   1629
   1630	return ret;
   1631}
   1632
   1633static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
   1634{
   1635	struct vcpu_svm *svm = to_svm(vcpu);
   1636
   1637	if (WARN_ON(!is_guest_mode(vcpu)))
   1638		return true;
   1639
   1640	if (!vcpu->arch.pdptrs_from_userspace &&
   1641	    !nested_npt_enabled(svm) && is_pae_paging(vcpu))
   1642		/*
   1643		 * Reload the guest's PDPTRs since after a migration
   1644		 * the guest CR3 might be restored prior to setting the nested
   1645		 * state which can lead to a load of wrong PDPTRs.
   1646		 */
   1647		if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
   1648			return false;
   1649
   1650	if (!nested_svm_vmrun_msrpm(svm)) {
   1651		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
   1652		vcpu->run->internal.suberror =
   1653			KVM_INTERNAL_ERROR_EMULATION;
   1654		vcpu->run->internal.ndata = 0;
   1655		return false;
   1656	}
   1657
   1658	return true;
   1659}
   1660
   1661struct kvm_x86_nested_ops svm_nested_ops = {
   1662	.leave_nested = svm_leave_nested,
   1663	.check_events = svm_check_nested_events,
   1664	.handle_page_fault_workaround = nested_svm_handle_page_fault_workaround,
   1665	.triple_fault = nested_svm_triple_fault,
   1666	.get_nested_state_pages = svm_get_nested_state_pages,
   1667	.get_state = svm_get_nested_state,
   1668	.set_state = svm_set_nested_state,
   1669};