cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

fault.c (11085B)


      1/*
      2 * Page fault handler for SH with an MMU.
      3 *
      4 *  Copyright (C) 1999  Niibe Yutaka
      5 *  Copyright (C) 2003 - 2012  Paul Mundt
      6 *
      7 *  Based on linux/arch/i386/mm/fault.c:
      8 *   Copyright (C) 1995  Linus Torvalds
      9 *
     10 * This file is subject to the terms and conditions of the GNU General Public
     11 * License.  See the file "COPYING" in the main directory of this archive
     12 * for more details.
     13 */
     14#include <linux/kernel.h>
     15#include <linux/mm.h>
     16#include <linux/sched/signal.h>
     17#include <linux/hardirq.h>
     18#include <linux/kprobes.h>
     19#include <linux/perf_event.h>
     20#include <linux/kdebug.h>
     21#include <linux/uaccess.h>
     22#include <asm/io_trapped.h>
     23#include <asm/mmu_context.h>
     24#include <asm/tlbflush.h>
     25#include <asm/traps.h>
     26
     27static void
     28force_sig_info_fault(int si_signo, int si_code, unsigned long address)
     29{
     30	force_sig_fault(si_signo, si_code, (void __user *)address);
     31}
     32
     33/*
     34 * This is useful to dump out the page tables associated with
     35 * 'addr' in mm 'mm'.
     36 */
     37static void show_pte(struct mm_struct *mm, unsigned long addr)
     38{
     39	pgd_t *pgd;
     40
     41	if (mm) {
     42		pgd = mm->pgd;
     43	} else {
     44		pgd = get_TTB();
     45
     46		if (unlikely(!pgd))
     47			pgd = swapper_pg_dir;
     48	}
     49
     50	pr_alert("pgd = %p\n", pgd);
     51	pgd += pgd_index(addr);
     52	pr_alert("[%08lx] *pgd=%0*llx", addr, (u32)(sizeof(*pgd) * 2),
     53		 (u64)pgd_val(*pgd));
     54
     55	do {
     56		p4d_t *p4d;
     57		pud_t *pud;
     58		pmd_t *pmd;
     59		pte_t *pte;
     60
     61		if (pgd_none(*pgd))
     62			break;
     63
     64		if (pgd_bad(*pgd)) {
     65			pr_cont("(bad)");
     66			break;
     67		}
     68
     69		p4d = p4d_offset(pgd, addr);
     70		if (PTRS_PER_P4D != 1)
     71			pr_cont(", *p4d=%0*Lx", (u32)(sizeof(*p4d) * 2),
     72			        (u64)p4d_val(*p4d));
     73
     74		if (p4d_none(*p4d))
     75			break;
     76
     77		if (p4d_bad(*p4d)) {
     78			pr_cont("(bad)");
     79			break;
     80		}
     81
     82		pud = pud_offset(p4d, addr);
     83		if (PTRS_PER_PUD != 1)
     84			pr_cont(", *pud=%0*llx", (u32)(sizeof(*pud) * 2),
     85				(u64)pud_val(*pud));
     86
     87		if (pud_none(*pud))
     88			break;
     89
     90		if (pud_bad(*pud)) {
     91			pr_cont("(bad)");
     92			break;
     93		}
     94
     95		pmd = pmd_offset(pud, addr);
     96		if (PTRS_PER_PMD != 1)
     97			pr_cont(", *pmd=%0*llx", (u32)(sizeof(*pmd) * 2),
     98				(u64)pmd_val(*pmd));
     99
    100		if (pmd_none(*pmd))
    101			break;
    102
    103		if (pmd_bad(*pmd)) {
    104			pr_cont("(bad)");
    105			break;
    106		}
    107
    108		/* We must not map this if we have highmem enabled */
    109		if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
    110			break;
    111
    112		pte = pte_offset_kernel(pmd, addr);
    113		pr_cont(", *pte=%0*llx", (u32)(sizeof(*pte) * 2),
    114			(u64)pte_val(*pte));
    115	} while (0);
    116
    117	pr_cont("\n");
    118}
    119
    120static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
    121{
    122	unsigned index = pgd_index(address);
    123	pgd_t *pgd_k;
    124	p4d_t *p4d, *p4d_k;
    125	pud_t *pud, *pud_k;
    126	pmd_t *pmd, *pmd_k;
    127
    128	pgd += index;
    129	pgd_k = init_mm.pgd + index;
    130
    131	if (!pgd_present(*pgd_k))
    132		return NULL;
    133
    134	p4d = p4d_offset(pgd, address);
    135	p4d_k = p4d_offset(pgd_k, address);
    136	if (!p4d_present(*p4d_k))
    137		return NULL;
    138
    139	pud = pud_offset(p4d, address);
    140	pud_k = pud_offset(p4d_k, address);
    141	if (!pud_present(*pud_k))
    142		return NULL;
    143
    144	if (!pud_present(*pud))
    145	    set_pud(pud, *pud_k);
    146
    147	pmd = pmd_offset(pud, address);
    148	pmd_k = pmd_offset(pud_k, address);
    149	if (!pmd_present(*pmd_k))
    150		return NULL;
    151
    152	if (!pmd_present(*pmd))
    153		set_pmd(pmd, *pmd_k);
    154	else {
    155		/*
    156		 * The page tables are fully synchronised so there must
    157		 * be another reason for the fault. Return NULL here to
    158		 * signal that we have not taken care of the fault.
    159		 */
    160		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
    161		return NULL;
    162	}
    163
    164	return pmd_k;
    165}
    166
    167#ifdef CONFIG_SH_STORE_QUEUES
    168#define __FAULT_ADDR_LIMIT	P3_ADDR_MAX
    169#else
    170#define __FAULT_ADDR_LIMIT	VMALLOC_END
    171#endif
    172
    173/*
    174 * Handle a fault on the vmalloc or module mapping area
    175 */
    176static noinline int vmalloc_fault(unsigned long address)
    177{
    178	pgd_t *pgd_k;
    179	pmd_t *pmd_k;
    180	pte_t *pte_k;
    181
    182	/* Make sure we are in vmalloc/module/P3 area: */
    183	if (!(address >= VMALLOC_START && address < __FAULT_ADDR_LIMIT))
    184		return -1;
    185
    186	/*
    187	 * Synchronize this task's top level page-table
    188	 * with the 'reference' page table.
    189	 *
    190	 * Do _not_ use "current" here. We might be inside
    191	 * an interrupt in the middle of a task switch..
    192	 */
    193	pgd_k = get_TTB();
    194	pmd_k = vmalloc_sync_one(pgd_k, address);
    195	if (!pmd_k)
    196		return -1;
    197
    198	pte_k = pte_offset_kernel(pmd_k, address);
    199	if (!pte_present(*pte_k))
    200		return -1;
    201
    202	return 0;
    203}
    204
    205static void
    206show_fault_oops(struct pt_regs *regs, unsigned long address)
    207{
    208	if (!oops_may_print())
    209		return;
    210
    211	pr_alert("BUG: unable to handle kernel %s at %08lx\n",
    212		 address < PAGE_SIZE ? "NULL pointer dereference"
    213				     : "paging request",
    214		 address);
    215	pr_alert("PC:");
    216	printk_address(regs->pc, 1);
    217
    218	show_pte(NULL, address);
    219}
    220
    221static noinline void
    222no_context(struct pt_regs *regs, unsigned long error_code,
    223	   unsigned long address)
    224{
    225	/* Are we prepared to handle this kernel fault?  */
    226	if (fixup_exception(regs))
    227		return;
    228
    229	if (handle_trapped_io(regs, address))
    230		return;
    231
    232	/*
    233	 * Oops. The kernel tried to access some bad page. We'll have to
    234	 * terminate things with extreme prejudice.
    235	 */
    236	bust_spinlocks(1);
    237
    238	show_fault_oops(regs, address);
    239
    240	die("Oops", regs, error_code);
    241}
    242
    243static void
    244__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
    245		       unsigned long address, int si_code)
    246{
    247	/* User mode accesses just cause a SIGSEGV */
    248	if (user_mode(regs)) {
    249		/*
    250		 * It's possible to have interrupts off here:
    251		 */
    252		local_irq_enable();
    253
    254		force_sig_info_fault(SIGSEGV, si_code, address);
    255
    256		return;
    257	}
    258
    259	no_context(regs, error_code, address);
    260}
    261
    262static noinline void
    263bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
    264		     unsigned long address)
    265{
    266	__bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
    267}
    268
    269static void
    270__bad_area(struct pt_regs *regs, unsigned long error_code,
    271	   unsigned long address, int si_code)
    272{
    273	struct mm_struct *mm = current->mm;
    274
    275	/*
    276	 * Something tried to access memory that isn't in our memory map..
    277	 * Fix it, but check if it's kernel or user first..
    278	 */
    279	mmap_read_unlock(mm);
    280
    281	__bad_area_nosemaphore(regs, error_code, address, si_code);
    282}
    283
    284static noinline void
    285bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
    286{
    287	__bad_area(regs, error_code, address, SEGV_MAPERR);
    288}
    289
    290static noinline void
    291bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
    292		      unsigned long address)
    293{
    294	__bad_area(regs, error_code, address, SEGV_ACCERR);
    295}
    296
    297static void
    298do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
    299{
    300	struct task_struct *tsk = current;
    301	struct mm_struct *mm = tsk->mm;
    302
    303	mmap_read_unlock(mm);
    304
    305	/* Kernel mode? Handle exceptions or die: */
    306	if (!user_mode(regs))
    307		no_context(regs, error_code, address);
    308
    309	force_sig_info_fault(SIGBUS, BUS_ADRERR, address);
    310}
    311
    312static noinline int
    313mm_fault_error(struct pt_regs *regs, unsigned long error_code,
    314	       unsigned long address, vm_fault_t fault)
    315{
    316	/*
    317	 * Pagefault was interrupted by SIGKILL. We have no reason to
    318	 * continue pagefault.
    319	 */
    320	if (fault_signal_pending(fault, regs)) {
    321		if (!user_mode(regs))
    322			no_context(regs, error_code, address);
    323		return 1;
    324	}
    325
    326	/* Release mmap_lock first if necessary */
    327	if (!(fault & VM_FAULT_RETRY))
    328		mmap_read_unlock(current->mm);
    329
    330	if (!(fault & VM_FAULT_ERROR))
    331		return 0;
    332
    333	if (fault & VM_FAULT_OOM) {
    334		/* Kernel mode? Handle exceptions or die: */
    335		if (!user_mode(regs)) {
    336			no_context(regs, error_code, address);
    337			return 1;
    338		}
    339
    340		/*
    341		 * We ran out of memory, call the OOM killer, and return the
    342		 * userspace (which will retry the fault, or kill us if we got
    343		 * oom-killed):
    344		 */
    345		pagefault_out_of_memory();
    346	} else {
    347		if (fault & VM_FAULT_SIGBUS)
    348			do_sigbus(regs, error_code, address);
    349		else if (fault & VM_FAULT_SIGSEGV)
    350			bad_area(regs, error_code, address);
    351		else
    352			BUG();
    353	}
    354
    355	return 1;
    356}
    357
    358static inline int access_error(int error_code, struct vm_area_struct *vma)
    359{
    360	if (error_code & FAULT_CODE_WRITE) {
    361		/* write, present and write, not present: */
    362		if (unlikely(!(vma->vm_flags & VM_WRITE)))
    363			return 1;
    364		return 0;
    365	}
    366
    367	/* ITLB miss on NX page */
    368	if (unlikely((error_code & FAULT_CODE_ITLB) &&
    369		     !(vma->vm_flags & VM_EXEC)))
    370		return 1;
    371
    372	/* read, not present: */
    373	if (unlikely(!vma_is_accessible(vma)))
    374		return 1;
    375
    376	return 0;
    377}
    378
    379static int fault_in_kernel_space(unsigned long address)
    380{
    381	return address >= TASK_SIZE;
    382}
    383
    384/*
    385 * This routine handles page faults.  It determines the address,
    386 * and the problem, and then passes it off to one of the appropriate
    387 * routines.
    388 */
    389asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
    390					unsigned long error_code,
    391					unsigned long address)
    392{
    393	unsigned long vec;
    394	struct task_struct *tsk;
    395	struct mm_struct *mm;
    396	struct vm_area_struct * vma;
    397	vm_fault_t fault;
    398	unsigned int flags = FAULT_FLAG_DEFAULT;
    399
    400	tsk = current;
    401	mm = tsk->mm;
    402	vec = lookup_exception_vector();
    403
    404	/*
    405	 * We fault-in kernel-space virtual memory on-demand. The
    406	 * 'reference' page table is init_mm.pgd.
    407	 *
    408	 * NOTE! We MUST NOT take any locks for this case. We may
    409	 * be in an interrupt or a critical region, and should
    410	 * only copy the information from the master page table,
    411	 * nothing more.
    412	 */
    413	if (unlikely(fault_in_kernel_space(address))) {
    414		if (vmalloc_fault(address) >= 0)
    415			return;
    416		if (kprobe_page_fault(regs, vec))
    417			return;
    418
    419		bad_area_nosemaphore(regs, error_code, address);
    420		return;
    421	}
    422
    423	if (unlikely(kprobe_page_fault(regs, vec)))
    424		return;
    425
    426	/* Only enable interrupts if they were on before the fault */
    427	if ((regs->sr & SR_IMASK) != SR_IMASK)
    428		local_irq_enable();
    429
    430	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
    431
    432	/*
    433	 * If we're in an interrupt, have no user context or are running
    434	 * with pagefaults disabled then we must not take the fault:
    435	 */
    436	if (unlikely(faulthandler_disabled() || !mm)) {
    437		bad_area_nosemaphore(regs, error_code, address);
    438		return;
    439	}
    440
    441retry:
    442	mmap_read_lock(mm);
    443
    444	vma = find_vma(mm, address);
    445	if (unlikely(!vma)) {
    446		bad_area(regs, error_code, address);
    447		return;
    448	}
    449	if (likely(vma->vm_start <= address))
    450		goto good_area;
    451	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
    452		bad_area(regs, error_code, address);
    453		return;
    454	}
    455	if (unlikely(expand_stack(vma, address))) {
    456		bad_area(regs, error_code, address);
    457		return;
    458	}
    459
    460	/*
    461	 * Ok, we have a good vm_area for this memory access, so
    462	 * we can handle it..
    463	 */
    464good_area:
    465	if (unlikely(access_error(error_code, vma))) {
    466		bad_area_access_error(regs, error_code, address);
    467		return;
    468	}
    469
    470	set_thread_fault_code(error_code);
    471
    472	if (user_mode(regs))
    473		flags |= FAULT_FLAG_USER;
    474	if (error_code & FAULT_CODE_WRITE)
    475		flags |= FAULT_FLAG_WRITE;
    476
    477	/*
    478	 * If for any reason at all we couldn't handle the fault,
    479	 * make sure we exit gracefully rather than endlessly redo
    480	 * the fault.
    481	 */
    482	fault = handle_mm_fault(vma, address, flags, regs);
    483
    484	if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
    485		if (mm_fault_error(regs, error_code, address, fault))
    486			return;
    487
    488	if (fault & VM_FAULT_RETRY) {
    489		flags |= FAULT_FLAG_TRIED;
    490
    491		/*
    492		 * No need to mmap_read_unlock(mm) as we would
    493		 * have already released it in __lock_page_or_retry
    494		 * in mm/filemap.c.
    495		 */
    496		goto retry;
    497	}
    498
    499	mmap_read_unlock(mm);
    500}