cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

fault.c (14762B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 *  linux/arch/arm/mm/fault.c
      4 *
      5 *  Copyright (C) 1995  Linus Torvalds
      6 *  Modifications for ARM processor (c) 1995-2004 Russell King
      7 */
      8#include <linux/extable.h>
      9#include <linux/signal.h>
     10#include <linux/mm.h>
     11#include <linux/hardirq.h>
     12#include <linux/init.h>
     13#include <linux/kprobes.h>
     14#include <linux/uaccess.h>
     15#include <linux/page-flags.h>
     16#include <linux/sched/signal.h>
     17#include <linux/sched/debug.h>
     18#include <linux/highmem.h>
     19#include <linux/perf_event.h>
     20#include <linux/kfence.h>
     21
     22#include <asm/system_misc.h>
     23#include <asm/system_info.h>
     24#include <asm/tlbflush.h>
     25
     26#include "fault.h"
     27
     28#ifdef CONFIG_MMU
     29
     30/*
     31 * This is useful to dump out the page tables associated with
     32 * 'addr' in mm 'mm'.
     33 */
     34void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
     35{
     36	pgd_t *pgd;
     37
     38	if (!mm)
     39		mm = &init_mm;
     40
     41	pgd = pgd_offset(mm, addr);
     42	printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd));
     43
     44	do {
     45		p4d_t *p4d;
     46		pud_t *pud;
     47		pmd_t *pmd;
     48		pte_t *pte;
     49
     50		p4d = p4d_offset(pgd, addr);
     51		if (p4d_none(*p4d))
     52			break;
     53
     54		if (p4d_bad(*p4d)) {
     55			pr_cont("(bad)");
     56			break;
     57		}
     58
     59		pud = pud_offset(p4d, addr);
     60		if (PTRS_PER_PUD != 1)
     61			pr_cont(", *pud=%08llx", (long long)pud_val(*pud));
     62
     63		if (pud_none(*pud))
     64			break;
     65
     66		if (pud_bad(*pud)) {
     67			pr_cont("(bad)");
     68			break;
     69		}
     70
     71		pmd = pmd_offset(pud, addr);
     72		if (PTRS_PER_PMD != 1)
     73			pr_cont(", *pmd=%08llx", (long long)pmd_val(*pmd));
     74
     75		if (pmd_none(*pmd))
     76			break;
     77
     78		if (pmd_bad(*pmd)) {
     79			pr_cont("(bad)");
     80			break;
     81		}
     82
     83		/* We must not map this if we have highmem enabled */
     84		if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
     85			break;
     86
     87		pte = pte_offset_map(pmd, addr);
     88		pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
     89#ifndef CONFIG_ARM_LPAE
     90		pr_cont(", *ppte=%08llx",
     91		       (long long)pte_val(pte[PTE_HWTABLE_PTRS]));
     92#endif
     93		pte_unmap(pte);
     94	} while(0);
     95
     96	pr_cont("\n");
     97}
     98#else					/* CONFIG_MMU */
     99void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
    100{ }
    101#endif					/* CONFIG_MMU */
    102
    103static inline bool is_write_fault(unsigned int fsr)
    104{
    105	return (fsr & FSR_WRITE) && !(fsr & FSR_CM);
    106}
    107
    108static void die_kernel_fault(const char *msg, struct mm_struct *mm,
    109			     unsigned long addr, unsigned int fsr,
    110			     struct pt_regs *regs)
    111{
    112	bust_spinlocks(1);
    113	pr_alert("8<--- cut here ---\n");
    114	pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
    115		 msg, addr);
    116
    117	show_pte(KERN_ALERT, mm, addr);
    118	die("Oops", regs, fsr);
    119	bust_spinlocks(0);
    120	make_task_dead(SIGKILL);
    121}
    122
    123/*
    124 * Oops.  The kernel tried to access some page that wasn't present.
    125 */
    126static void
    127__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
    128		  struct pt_regs *regs)
    129{
    130	const char *msg;
    131	/*
    132	 * Are we prepared to handle this kernel fault?
    133	 */
    134	if (fixup_exception(regs))
    135		return;
    136
    137	/*
    138	 * No handler, we'll have to terminate things with extreme prejudice.
    139	 */
    140	if (addr < PAGE_SIZE) {
    141		msg = "NULL pointer dereference";
    142	} else {
    143		if (kfence_handle_page_fault(addr, is_write_fault(fsr), regs))
    144			return;
    145
    146		msg = "paging request";
    147	}
    148
    149	die_kernel_fault(msg, mm, addr, fsr, regs);
    150}
    151
    152/*
    153 * Something tried to access memory that isn't in our memory map..
    154 * User mode accesses just cause a SIGSEGV
    155 */
    156static void
    157__do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
    158		int code, struct pt_regs *regs)
    159{
    160	struct task_struct *tsk = current;
    161
    162	if (addr > TASK_SIZE)
    163		harden_branch_predictor();
    164
    165#ifdef CONFIG_DEBUG_USER
    166	if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
    167	    ((user_debug & UDBG_BUS)  && (sig == SIGBUS))) {
    168		pr_err("8<--- cut here ---\n");
    169		pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
    170		       tsk->comm, sig, addr, fsr);
    171		show_pte(KERN_ERR, tsk->mm, addr);
    172		show_regs(regs);
    173	}
    174#endif
    175#ifndef CONFIG_KUSER_HELPERS
    176	if ((sig == SIGSEGV) && ((addr & PAGE_MASK) == 0xffff0000))
    177		printk_ratelimited(KERN_DEBUG
    178				   "%s: CONFIG_KUSER_HELPERS disabled at 0x%08lx\n",
    179				   tsk->comm, addr);
    180#endif
    181
    182	tsk->thread.address = addr;
    183	tsk->thread.error_code = fsr;
    184	tsk->thread.trap_no = 14;
    185	force_sig_fault(sig, code, (void __user *)addr);
    186}
    187
    188void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
    189{
    190	struct task_struct *tsk = current;
    191	struct mm_struct *mm = tsk->active_mm;
    192
    193	/*
    194	 * If we are in kernel mode at this point, we
    195	 * have no context to handle this fault with.
    196	 */
    197	if (user_mode(regs))
    198		__do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
    199	else
    200		__do_kernel_fault(mm, addr, fsr, regs);
    201}
    202
    203#ifdef CONFIG_MMU
    204#define VM_FAULT_BADMAP		((__force vm_fault_t)0x010000)
    205#define VM_FAULT_BADACCESS	((__force vm_fault_t)0x020000)
    206
    207static inline bool is_permission_fault(unsigned int fsr)
    208{
    209	int fs = fsr_fs(fsr);
    210#ifdef CONFIG_ARM_LPAE
    211	if ((fs & FS_PERM_NOLL_MASK) == FS_PERM_NOLL)
    212		return true;
    213#else
    214	if (fs == FS_L1_PERM || fs == FS_L2_PERM)
    215		return true;
    216#endif
    217	return false;
    218}
    219
    220static vm_fault_t __kprobes
    221__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int flags,
    222		unsigned long vma_flags, struct pt_regs *regs)
    223{
    224	struct vm_area_struct *vma = find_vma(mm, addr);
    225	if (unlikely(!vma))
    226		return VM_FAULT_BADMAP;
    227
    228	if (unlikely(vma->vm_start > addr)) {
    229		if (!(vma->vm_flags & VM_GROWSDOWN))
    230			return VM_FAULT_BADMAP;
    231		if (addr < FIRST_USER_ADDRESS)
    232			return VM_FAULT_BADMAP;
    233		if (expand_stack(vma, addr))
    234			return VM_FAULT_BADMAP;
    235	}
    236
    237	/*
    238	 * ok, we have a good vm_area for this memory access, check the
    239	 * permissions on the VMA allow for the fault which occurred.
    240	 */
    241	if (!(vma->vm_flags & vma_flags))
    242		return VM_FAULT_BADACCESS;
    243
    244	return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
    245}
    246
    247static int __kprobes
    248do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
    249{
    250	struct mm_struct *mm = current->mm;
    251	int sig, code;
    252	vm_fault_t fault;
    253	unsigned int flags = FAULT_FLAG_DEFAULT;
    254	unsigned long vm_flags = VM_ACCESS_FLAGS;
    255
    256	if (kprobe_page_fault(regs, fsr))
    257		return 0;
    258
    259
    260	/* Enable interrupts if they were enabled in the parent context. */
    261	if (interrupts_enabled(regs))
    262		local_irq_enable();
    263
    264	/*
    265	 * If we're in an interrupt or have no user
    266	 * context, we must not take the fault..
    267	 */
    268	if (faulthandler_disabled() || !mm)
    269		goto no_context;
    270
    271	if (user_mode(regs))
    272		flags |= FAULT_FLAG_USER;
    273
    274	if (is_write_fault(fsr)) {
    275		flags |= FAULT_FLAG_WRITE;
    276		vm_flags = VM_WRITE;
    277	}
    278
    279	if (fsr & FSR_LNX_PF) {
    280		vm_flags = VM_EXEC;
    281
    282		if (is_permission_fault(fsr) && !user_mode(regs))
    283			die_kernel_fault("execution of memory",
    284					 mm, addr, fsr, regs);
    285	}
    286
    287	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
    288
    289	/*
    290	 * As per x86, we may deadlock here.  However, since the kernel only
    291	 * validly references user space from well defined areas of the code,
    292	 * we can bug out early if this is from code which shouldn't.
    293	 */
    294	if (!mmap_read_trylock(mm)) {
    295		if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
    296			goto no_context;
    297retry:
    298		mmap_read_lock(mm);
    299	} else {
    300		/*
    301		 * The above down_read_trylock() might have succeeded in
    302		 * which case, we'll have missed the might_sleep() from
    303		 * down_read()
    304		 */
    305		might_sleep();
    306#ifdef CONFIG_DEBUG_VM
    307		if (!user_mode(regs) &&
    308		    !search_exception_tables(regs->ARM_pc))
    309			goto no_context;
    310#endif
    311	}
    312
    313	fault = __do_page_fault(mm, addr, flags, vm_flags, regs);
    314
    315	/* If we need to retry but a fatal signal is pending, handle the
    316	 * signal first. We do not need to release the mmap_lock because
    317	 * it would already be released in __lock_page_or_retry in
    318	 * mm/filemap.c. */
    319	if (fault_signal_pending(fault, regs)) {
    320		if (!user_mode(regs))
    321			goto no_context;
    322		return 0;
    323	}
    324
    325	if (!(fault & VM_FAULT_ERROR)) {
    326		if (fault & VM_FAULT_RETRY) {
    327			flags |= FAULT_FLAG_TRIED;
    328			goto retry;
    329		}
    330	}
    331
    332	mmap_read_unlock(mm);
    333
    334	/*
    335	 * Handle the "normal" case first - VM_FAULT_MAJOR
    336	 */
    337	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
    338		return 0;
    339
    340	/*
    341	 * If we are in kernel mode at this point, we
    342	 * have no context to handle this fault with.
    343	 */
    344	if (!user_mode(regs))
    345		goto no_context;
    346
    347	if (fault & VM_FAULT_OOM) {
    348		/*
    349		 * We ran out of memory, call the OOM killer, and return to
    350		 * userspace (which will retry the fault, or kill us if we
    351		 * got oom-killed)
    352		 */
    353		pagefault_out_of_memory();
    354		return 0;
    355	}
    356
    357	if (fault & VM_FAULT_SIGBUS) {
    358		/*
    359		 * We had some memory, but were unable to
    360		 * successfully fix up this page fault.
    361		 */
    362		sig = SIGBUS;
    363		code = BUS_ADRERR;
    364	} else {
    365		/*
    366		 * Something tried to access memory that
    367		 * isn't in our memory map..
    368		 */
    369		sig = SIGSEGV;
    370		code = fault == VM_FAULT_BADACCESS ?
    371			SEGV_ACCERR : SEGV_MAPERR;
    372	}
    373
    374	__do_user_fault(addr, fsr, sig, code, regs);
    375	return 0;
    376
    377no_context:
    378	__do_kernel_fault(mm, addr, fsr, regs);
    379	return 0;
    380}
    381#else					/* CONFIG_MMU */
    382static int
    383do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
    384{
    385	return 0;
    386}
    387#endif					/* CONFIG_MMU */
    388
    389/*
    390 * First Level Translation Fault Handler
    391 *
    392 * We enter here because the first level page table doesn't contain
    393 * a valid entry for the address.
    394 *
    395 * If the address is in kernel space (>= TASK_SIZE), then we are
    396 * probably faulting in the vmalloc() area.
    397 *
    398 * If the init_task's first level page tables contains the relevant
    399 * entry, we copy the it to this task.  If not, we send the process
    400 * a signal, fixup the exception, or oops the kernel.
    401 *
    402 * NOTE! We MUST NOT take any locks for this case. We may be in an
    403 * interrupt or a critical region, and should only copy the information
    404 * from the master page table, nothing more.
    405 */
    406#ifdef CONFIG_MMU
    407static int __kprobes
    408do_translation_fault(unsigned long addr, unsigned int fsr,
    409		     struct pt_regs *regs)
    410{
    411	unsigned int index;
    412	pgd_t *pgd, *pgd_k;
    413	p4d_t *p4d, *p4d_k;
    414	pud_t *pud, *pud_k;
    415	pmd_t *pmd, *pmd_k;
    416
    417	if (addr < TASK_SIZE)
    418		return do_page_fault(addr, fsr, regs);
    419
    420	if (user_mode(regs))
    421		goto bad_area;
    422
    423	index = pgd_index(addr);
    424
    425	pgd = cpu_get_pgd() + index;
    426	pgd_k = init_mm.pgd + index;
    427
    428	p4d = p4d_offset(pgd, addr);
    429	p4d_k = p4d_offset(pgd_k, addr);
    430
    431	if (p4d_none(*p4d_k))
    432		goto bad_area;
    433	if (!p4d_present(*p4d))
    434		set_p4d(p4d, *p4d_k);
    435
    436	pud = pud_offset(p4d, addr);
    437	pud_k = pud_offset(p4d_k, addr);
    438
    439	if (pud_none(*pud_k))
    440		goto bad_area;
    441	if (!pud_present(*pud))
    442		set_pud(pud, *pud_k);
    443
    444	pmd = pmd_offset(pud, addr);
    445	pmd_k = pmd_offset(pud_k, addr);
    446
    447#ifdef CONFIG_ARM_LPAE
    448	/*
    449	 * Only one hardware entry per PMD with LPAE.
    450	 */
    451	index = 0;
    452#else
    453	/*
    454	 * On ARM one Linux PGD entry contains two hardware entries (see page
    455	 * tables layout in pgtable.h). We normally guarantee that we always
    456	 * fill both L1 entries. But create_mapping() doesn't follow the rule.
    457	 * It can create inidividual L1 entries, so here we have to call
    458	 * pmd_none() check for the entry really corresponded to address, not
    459	 * for the first of pair.
    460	 */
    461	index = (addr >> SECTION_SHIFT) & 1;
    462#endif
    463	if (pmd_none(pmd_k[index]))
    464		goto bad_area;
    465
    466	copy_pmd(pmd, pmd_k);
    467	return 0;
    468
    469bad_area:
    470	do_bad_area(addr, fsr, regs);
    471	return 0;
    472}
    473#else					/* CONFIG_MMU */
    474static int
    475do_translation_fault(unsigned long addr, unsigned int fsr,
    476		     struct pt_regs *regs)
    477{
    478	return 0;
    479}
    480#endif					/* CONFIG_MMU */
    481
    482/*
    483 * Some section permission faults need to be handled gracefully.
    484 * They can happen due to a __{get,put}_user during an oops.
    485 */
    486#ifndef CONFIG_ARM_LPAE
    487static int
    488do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
    489{
    490	do_bad_area(addr, fsr, regs);
    491	return 0;
    492}
    493#endif /* CONFIG_ARM_LPAE */
    494
    495/*
    496 * This abort handler always returns "fault".
    497 */
    498static int
    499do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
    500{
    501	return 1;
    502}
    503
    504struct fsr_info {
    505	int	(*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
    506	int	sig;
    507	int	code;
    508	const char *name;
    509};
    510
    511/* FSR definition */
    512#ifdef CONFIG_ARM_LPAE
    513#include "fsr-3level.c"
    514#else
    515#include "fsr-2level.c"
    516#endif
    517
    518void __init
    519hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
    520		int sig, int code, const char *name)
    521{
    522	if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))
    523		BUG();
    524
    525	fsr_info[nr].fn   = fn;
    526	fsr_info[nr].sig  = sig;
    527	fsr_info[nr].code = code;
    528	fsr_info[nr].name = name;
    529}
    530
    531/*
    532 * Dispatch a data abort to the relevant handler.
    533 */
    534asmlinkage void
    535do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
    536{
    537	const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
    538
    539	if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
    540		return;
    541
    542	pr_alert("8<--- cut here ---\n");
    543	pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
    544		inf->name, fsr, addr);
    545	show_pte(KERN_ALERT, current->mm, addr);
    546
    547	arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
    548		       fsr, 0);
    549}
    550
    551void __init
    552hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
    553		 int sig, int code, const char *name)
    554{
    555	if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info))
    556		BUG();
    557
    558	ifsr_info[nr].fn   = fn;
    559	ifsr_info[nr].sig  = sig;
    560	ifsr_info[nr].code = code;
    561	ifsr_info[nr].name = name;
    562}
    563
    564asmlinkage void
    565do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
    566{
    567	const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
    568
    569	if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
    570		return;
    571
    572	pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
    573		inf->name, ifsr, addr);
    574
    575	arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
    576		       ifsr, 0);
    577}
    578
    579/*
    580 * Abort handler to be used only during first unmasking of asynchronous aborts
    581 * on the boot CPU. This makes sure that the machine will not die if the
    582 * firmware/bootloader left an imprecise abort pending for us to trip over.
    583 */
    584static int __init early_abort_handler(unsigned long addr, unsigned int fsr,
    585				      struct pt_regs *regs)
    586{
    587	pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during "
    588		"first unmask, this is most likely caused by a "
    589		"firmware/bootloader bug.\n", fsr);
    590
    591	return 0;
    592}
    593
    594void __init early_abt_enable(void)
    595{
    596	fsr_info[FSR_FS_AEA].fn = early_abort_handler;
    597	local_abt_enable();
    598	fsr_info[FSR_FS_AEA].fn = do_bad;
    599}
    600
    601#ifndef CONFIG_ARM_LPAE
    602static int __init exceptions_init(void)
    603{
    604	if (cpu_architecture() >= CPU_ARCH_ARMv6) {
    605		hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR,
    606				"I-cache maintenance fault");
    607	}
    608
    609	if (cpu_architecture() >= CPU_ARCH_ARMv7) {
    610		/*
    611		 * TODO: Access flag faults introduced in ARMv6K.
    612		 * Runtime check for 'K' extension is needed
    613		 */
    614		hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR,
    615				"section access flag fault");
    616		hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR,
    617				"section access flag fault");
    618	}
    619
    620	return 0;
    621}
    622
    623arch_initcall(exceptions_init);
    624#endif