summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/mm/fault.c70
-rw-r--r--include/linux/mm.h3
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--mm/memory.c13
4 files changed, 88 insertions, 1 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a4c270e99f7f..1c6879b8ee35 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -19,6 +19,7 @@
#include <linux/uaccess.h> /* faulthandler_disabled() */
#include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/
#include <linux/mm_types.h>
+#include <linux/sev.h> /* snp_lookup_rmpentry() */
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
#include <asm/traps.h> /* dotraplinkage, ... */
@@ -1209,6 +1210,64 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
}
NOKPROBE_SYMBOL(do_kern_addr_fault);
+static inline size_t pages_per_hpage(int level)
+{
+ return page_level_size(level) / PAGE_SIZE;
+}
+
+/*
+ * Return 1 if the caller need to retry, 0 if it the address need to be split
+ * in order to resolve the fault.
+ */
+static int handle_user_rmp_page_fault(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
+{
+ int rmp_level, level;
+ pgd_t *pgd;
+ pte_t *pte;
+ u64 pfn;
+
+ pgd = __va(read_cr3_pa());
+ pgd += pgd_index(address);
+
+ pte = lookup_address_in_pgd(pgd, address, &level);
+
+ /*
+ * It can happen if there was a race between an unmap event and
+ * the RMP fault delivery.
+ */
+ if (!pte || !pte_present(*pte))
+ return 1;
+
+ pfn = pte_pfn(*pte);
+
+ /* If its large page then calculte the fault pfn */
+ if (level > PG_LEVEL_4K) {
+ unsigned long mask;
+
+ mask = pages_per_hpage(level) - pages_per_hpage(level - 1);
+ pfn |= (address >> PAGE_SHIFT) & mask;
+ }
+
+ /*
+ * If its a guest private page, then the fault cannot be resolved.
+ * Send a SIGBUS to terminate the process.
+ */
+ if (snp_lookup_rmpentry(pfn, &rmp_level)) {
+ do_sigbus(regs, error_code, address, VM_FAULT_SIGBUS);
+ return 1;
+ }
+
+ /*
+ * The backing page level is higher than the RMP page level, request
+ * to split the page.
+ */
+ if (level > rmp_level)
+ return 0;
+
+ return 1;
+}
+
/*
* Handle faults in the user portion of the address space. Nothing in here
* should check X86_PF_USER without a specific justification: for almost
@@ -1306,6 +1365,17 @@ void do_user_addr_fault(struct pt_regs *regs,
if (error_code & X86_PF_INSTR)
flags |= FAULT_FLAG_INSTRUCTION;
+ /*
+ * If its an RMP violation, try resolving it.
+ */
+ if (error_code & X86_PF_RMP) {
+ if (handle_user_rmp_page_fault(regs, error_code, address))
+ return;
+
+ /* Ask to split the page */
+ flags |= FAULT_FLAG_PAGE_SPLIT;
+ }
+
#ifdef CONFIG_X86_64
/*
* Faults in the vsyscall page might need emulation. The
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cf3d0d673f6b..02e5bc1a17ac 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -463,7 +463,8 @@ static inline bool fault_flag_allow_retry_first(enum fault_flag flags)
{ FAULT_FLAG_USER, "USER" }, \
{ FAULT_FLAG_REMOTE, "REMOTE" }, \
{ FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \
- { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }
+ { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }, \
+ { FAULT_FLAG_PAGE_SPLIT, "PAGESPLIT" }
/*
* vm_fault is filled by the pagefault handler and passed to the vma's
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c29ab4c0cd5c..d653ee873d7d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -829,6 +829,8 @@ typedef struct {
* mapped R/O.
* @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached.
* We should only access orig_pte if this flag set.
+ * @FAULT_FLAG_PAGE_SPLIT: The fault was due page size mismatch, split the
+ * region to smaller page size and retry.
*
* About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
* whether we would allow page faults to retry by specifying these two
@@ -866,6 +868,7 @@ enum fault_flag {
FAULT_FLAG_INTERRUPTIBLE = 1 << 9,
FAULT_FLAG_UNSHARE = 1 << 10,
FAULT_FLAG_ORIG_PTE_VALID = 1 << 11,
+ FAULT_FLAG_PAGE_SPLIT = 1 << 12,
};
typedef unsigned int __bitwise zap_flags_t;
diff --git a/mm/memory.c b/mm/memory.c
index 7a089145cad4..657d07be7c5b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4944,6 +4944,15 @@ unlock:
return 0;
}
+static int handle_split_page_fault(struct vm_fault *vmf)
+{
+ if (!IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+ return VM_FAULT_SIGBUS;
+
+ __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL);
+ return 0;
+}
+
/*
* By the time we get here, we already hold the mm semaphore
*
@@ -5023,6 +5032,10 @@ retry_pud:
pmd_migration_entry_wait(mm, vmf.pmd);
return 0;
}
+
+ if (flags & FAULT_FLAG_PAGE_SPLIT)
+ return handle_split_page_fault(&vmf);
+
if (pmd_trans_huge(vmf.orig_pmd) || pmd_devmap(vmf.orig_pmd)) {
if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma))
return do_huge_pmd_numa_page(&vmf);