diff options
| -rw-r--r-- | arch/x86/mm/fault.c | 70 | ||||
| -rw-r--r-- | include/linux/mm.h | 3 | ||||
| -rw-r--r-- | include/linux/mm_types.h | 3 | ||||
| -rw-r--r-- | mm/memory.c | 13 |
4 files changed, 88 insertions, 1 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a4c270e99f7f..1c6879b8ee35 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -19,6 +19,7 @@ #include <linux/uaccess.h> /* faulthandler_disabled() */ #include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/ #include <linux/mm_types.h> +#include <linux/sev.h> /* snp_lookup_rmpentry() */ #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/traps.h> /* dotraplinkage, ... */ @@ -1209,6 +1210,64 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, } NOKPROBE_SYMBOL(do_kern_addr_fault); +static inline size_t pages_per_hpage(int level) +{ + return page_level_size(level) / PAGE_SIZE; +} + +/* + * Return 1 if the caller need to retry, 0 if it the address need to be split + * in order to resolve the fault. + */ +static int handle_user_rmp_page_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ + int rmp_level, level; + pgd_t *pgd; + pte_t *pte; + u64 pfn; + + pgd = __va(read_cr3_pa()); + pgd += pgd_index(address); + + pte = lookup_address_in_pgd(pgd, address, &level); + + /* + * It can happen if there was a race between an unmap event and + * the RMP fault delivery. + */ + if (!pte || !pte_present(*pte)) + return 1; + + pfn = pte_pfn(*pte); + + /* If its large page then calculte the fault pfn */ + if (level > PG_LEVEL_4K) { + unsigned long mask; + + mask = pages_per_hpage(level) - pages_per_hpage(level - 1); + pfn |= (address >> PAGE_SHIFT) & mask; + } + + /* + * If its a guest private page, then the fault cannot be resolved. + * Send a SIGBUS to terminate the process. + */ + if (snp_lookup_rmpentry(pfn, &rmp_level)) { + do_sigbus(regs, error_code, address, VM_FAULT_SIGBUS); + return 1; + } + + /* + * The backing page level is higher than the RMP page level, request + * to split the page. + */ + if (level > rmp_level) + return 0; + + return 1; +} + /* * Handle faults in the user portion of the address space. Nothing in here * should check X86_PF_USER without a specific justification: for almost @@ -1306,6 +1365,17 @@ void do_user_addr_fault(struct pt_regs *regs, if (error_code & X86_PF_INSTR) flags |= FAULT_FLAG_INSTRUCTION; + /* + * If its an RMP violation, try resolving it. + */ + if (error_code & X86_PF_RMP) { + if (handle_user_rmp_page_fault(regs, error_code, address)) + return; + + /* Ask to split the page */ + flags |= FAULT_FLAG_PAGE_SPLIT; + } + #ifdef CONFIG_X86_64 /* * Faults in the vsyscall page might need emulation. The diff --git a/include/linux/mm.h b/include/linux/mm.h index cf3d0d673f6b..02e5bc1a17ac 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -463,7 +463,8 @@ static inline bool fault_flag_allow_retry_first(enum fault_flag flags) { FAULT_FLAG_USER, "USER" }, \ { FAULT_FLAG_REMOTE, "REMOTE" }, \ { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \ - { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } + { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }, \ + { FAULT_FLAG_PAGE_SPLIT, "PAGESPLIT" } /* * vm_fault is filled by the pagefault handler and passed to the vma's diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c29ab4c0cd5c..d653ee873d7d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -829,6 +829,8 @@ typedef struct { * mapped R/O. * @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached. * We should only access orig_pte if this flag set. + * @FAULT_FLAG_PAGE_SPLIT: The fault was due page size mismatch, split the + * region to smaller page size and retry. * * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify * whether we would allow page faults to retry by specifying these two @@ -866,6 +868,7 @@ enum fault_flag { FAULT_FLAG_INTERRUPTIBLE = 1 << 9, FAULT_FLAG_UNSHARE = 1 << 10, FAULT_FLAG_ORIG_PTE_VALID = 1 << 11, + FAULT_FLAG_PAGE_SPLIT = 1 << 12, }; typedef unsigned int __bitwise zap_flags_t; diff --git a/mm/memory.c b/mm/memory.c index 7a089145cad4..657d07be7c5b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4944,6 +4944,15 @@ unlock: return 0; } +static int handle_split_page_fault(struct vm_fault *vmf) +{ + if (!IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) + return VM_FAULT_SIGBUS; + + __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL); + return 0; +} + /* * By the time we get here, we already hold the mm semaphore * @@ -5023,6 +5032,10 @@ retry_pud: pmd_migration_entry_wait(mm, vmf.pmd); return 0; } + + if (flags & FAULT_FLAG_PAGE_SPLIT) + return handle_split_page_fault(&vmf); + if (pmd_trans_huge(vmf.orig_pmd) || pmd_devmap(vmf.orig_pmd)) { if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma)) return do_huge_pmd_numa_page(&vmf); |
