summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-24 11:58:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-24 11:58:57 -0700
commit1ebdbeb03efe89f01f15df038a589077df3d21f5 (patch)
tree06b6b7bb565668d136c060c5104481e48cbf71e2 /arch/x86/kvm/mmu.h
parentefee6c79298fd823c569d501d041de85caa102a6 (diff)
parentc9b8fecddb5bb4b67e351bbaeaa648a6f7456912 (diff)
downloadcachepc-linux-1ebdbeb03efe89f01f15df038a589077df3d21f5.tar.gz
cachepc-linux-1ebdbeb03efe89f01f15df038a589077df3d21f5.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "ARM: - Proper emulation of the OSLock feature of the debug architecture - Scalibility improvements for the MMU lock when dirty logging is on - New VMID allocator, which will eventually help with SVA in VMs - Better support for PMUs in heterogenous systems - PSCI 1.1 support, enabling support for SYSTEM_RESET2 - Implement CONFIG_DEBUG_LIST at EL2 - Make CONFIG_ARM64_ERRATUM_2077057 default y - Reduce the overhead of VM exit when no interrupt is pending - Remove traces of 32bit ARM host support from the documentation - Updated vgic selftests - Various cleanups, doc updates and spelling fixes RISC-V: - Prevent KVM_COMPAT from being selected - Optimize __kvm_riscv_switch_to() implementation - RISC-V SBI v0.3 support s390: - memop selftest - fix SCK locking - adapter interruptions virtualization for secure guests - add Claudio Imbrenda as maintainer - first step to do proper storage key checking x86: - Continue switching kvm_x86_ops to static_call(); introduce static_call_cond() and __static_call_ret0 when applicable. - Cleanup unused arguments in several functions - Synthesize AMD 0x80000021 leaf - Fixes and optimization for Hyper-V sparse-bank hypercalls - Implement Hyper-V's enlightened MSR bitmap for nested SVM - Remove MMU auditing - Eager splitting of page tables (new aka "TDP" MMU only) when dirty page tracking is enabled - Cleanup the implementation of the guest PGD cache - Preparation for the implementation of Intel IPI virtualization - Fix some segment descriptor checks in the emulator - Allow AMD AVIC support on systems with physical APIC ID above 255 - Better API to disable virtualization quirks - Fixes and optimizations for the zapping of page tables: - Zap roots in two passes, avoiding RCU read-side critical sections that last too long for very large guests backed by 4 KiB SPTEs. - Zap invalid and defunct roots asynchronously via concurrency-managed work queue. - Allowing yielding when zapping TDP MMU roots in response to the root's last reference being put. - Batch more TLB flushes with an RCU trick. Whoever frees the paging structure now holds RCU as a proxy for all vCPUs running in the guest, i.e. to prolongs the grace period on their behalf. It then kicks the the vCPUs out of guest mode before doing rcu_read_unlock(). Generic: - Introduce __vcalloc and use it for very large allocations that need memcg accounting" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (246 commits) KVM: use kvcalloc for array allocations KVM: x86: Introduce KVM_CAP_DISABLE_QUIRKS2 kvm: x86: Require const tsc for RT KVM: x86: synthesize CPUID leaf 0x80000021h if useful KVM: x86: add support for CPUID leaf 0x80000021 KVM: x86: do not use KVM_X86_OP_OPTIONAL_RET0 for get_mt_mask Revert "KVM: x86/mmu: Zap only TDP MMU leafs in kvm_zap_gfn_range()" kvm: x86/mmu: Flush TLB before zap_gfn_range releases RCU KVM: arm64: fix typos in comments KVM: arm64: Generalise VM features into a set of flags KVM: s390: selftests: Add error memop tests KVM: s390: selftests: Add more copy memop tests KVM: s390: selftests: Add named stages for memop test KVM: s390: selftests: Add macro as abstraction for MEM_OP KVM: s390: selftests: Split memop tests KVM: s390x: fix SCK locking RISC-V: KVM: Implement SBI HSM suspend call RISC-V: KVM: Add common kvm_riscv_vcpu_wfi() function RISC-V: Add SBI HSM suspend related defines RISC-V: KVM: Implement SBI v0.3 SRST extension ...
Diffstat (limited to 'arch/x86/kvm/mmu.h')
-rw-r--r--arch/x86/kvm/mmu.h44
1 files changed, 4 insertions, 40 deletions
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index e9fbb2c8bbe2..bf8dbc4bb12a 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -48,6 +48,7 @@
X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE)
#define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP)
+#define KVM_MMU_EFER_ROLE_BITS (EFER_LME | EFER_NX)
static __always_inline u64 rsvd_bits(int s, int e)
{
@@ -79,12 +80,13 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
+void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
- if (likely(vcpu->arch.mmu->root_hpa != INVALID_PAGE))
+ if (likely(vcpu->arch.mmu->root.hpa != INVALID_PAGE))
return 0;
return kvm_mmu_load(vcpu);
@@ -106,7 +108,7 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
{
- u64 root_hpa = vcpu->arch.mmu->root_hpa;
+ u64 root_hpa = vcpu->arch.mmu->root.hpa;
if (!VALID_PAGE(root_hpa))
return;
@@ -203,44 +205,6 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
}
/*
- * Currently, we have two sorts of write-protection, a) the first one
- * write-protects guest page to sync the guest modification, b) another one is
- * used to sync dirty bitmap when we do KVM_GET_DIRTY_LOG. The differences
- * between these two sorts are:
- * 1) the first case clears MMU-writable bit.
- * 2) the first case requires flushing tlb immediately avoiding corrupting
- * shadow page table between all vcpus so it should be in the protection of
- * mmu-lock. And the another case does not need to flush tlb until returning
- * the dirty bitmap to userspace since it only write-protects the page
- * logged in the bitmap, that means the page in the dirty bitmap is not
- * missed, so it can flush tlb out of mmu-lock.
- *
- * So, there is the problem: the first case can meet the corrupted tlb caused
- * by another case which write-protects pages but without flush tlb
- * immediately. In order to making the first case be aware this problem we let
- * it flush tlb if we try to write-protect a spte whose MMU-writable bit
- * is set, it works since another case never touches MMU-writable bit.
- *
- * Anyway, whenever a spte is updated (only permission and status bits are
- * changed) we need to check whether the spte with MMU-writable becomes
- * readonly, if that happens, we need to flush tlb. Fortunately,
- * mmu_spte_update() has already handled it perfectly.
- *
- * The rules to use MMU-writable and PT_WRITABLE_MASK:
- * - if we want to see if it has writable tlb entry or if the spte can be
- * writable on the mmu mapping, check MMU-writable, this is the most
- * case, otherwise
- * - if we fix page fault on the spte or do write-protection by dirty logging,
- * check PT_WRITABLE_MASK.
- *
- * TODO: introduce APIs to split these two cases.
- */
-static inline bool is_writable_pte(unsigned long pte)
-{
- return pte & PT_WRITABLE_MASK;
-}
-
-/*
* Check if a given access (described through the I/D, W/R and U/S bits of a
* page fault error code pfec) causes a permission fault with the given PTE
* access rights (in ACC_* format).