fault.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
fault.c (7787B)
      1/*
      2 *  arch/microblaze/mm/fault.c
      3 *
      4 *    Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
      5 *
      6 *  Derived from "arch/ppc/mm/fault.c"
      7 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
      8 *
      9 *  Derived from "arch/i386/mm/fault.c"
     10 *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
     11 *
     12 *  Modified by Cort Dougan and Paul Mackerras.
     13 *
     14 * This file is subject to the terms and conditions of the GNU General
     15 * Public License.  See the file COPYING in the main directory of this
     16 * archive for more details.
     17 *
     18 */
     19
     20#include <linux/extable.h>
     21#include <linux/signal.h>
     22#include <linux/sched.h>
     23#include <linux/kernel.h>
     24#include <linux/errno.h>
     25#include <linux/string.h>
     26#include <linux/types.h>
     27#include <linux/ptrace.h>
     28#include <linux/mman.h>
     29#include <linux/mm.h>
     30#include <linux/interrupt.h>
     31#include <linux/perf_event.h>
     32
     33#include <asm/page.h>
     34#include <asm/mmu.h>
     35#include <linux/mmu_context.h>
     36#include <linux/uaccess.h>
     37#include <asm/exceptions.h>
     38
     39static unsigned long pte_misses;	/* updated by do_page_fault() */
     40static unsigned long pte_errors;	/* updated by do_page_fault() */
     41
     42/*
     43 * Check whether the instruction at regs->pc is a store using
     44 * an update addressing form which will update r1.
     45 */
     46static int store_updates_sp(struct pt_regs *regs)
     47{
     48	unsigned int inst;
     49
     50	if (get_user(inst, (unsigned int __user *)regs->pc))
     51		return 0;
     52	/* check for 1 in the rD field */
     53	if (((inst >> 21) & 0x1f) != 1)
     54		return 0;
     55	/* check for store opcodes */
     56	if ((inst & 0xd0000000) == 0xd0000000)
     57		return 1;
     58	return 0;
     59}
     60
     61
     62/*
     63 * bad_page_fault is called when we have a bad access from the kernel.
     64 * It is called from do_page_fault above and from some of the procedures
     65 * in traps.c.
     66 */
     67void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
     68{
     69	const struct exception_table_entry *fixup;
     70/* MS: no context */
     71	/* Are we prepared to handle this fault?  */
     72	fixup = search_exception_tables(regs->pc);
     73	if (fixup) {
     74		regs->pc = fixup->fixup;
     75		return;
     76	}
     77
     78	/* kernel has accessed a bad area */
     79	die("kernel access of bad area", regs, sig);
     80}
     81
     82/*
     83 * The error_code parameter is ESR for a data fault,
     84 * 0 for an instruction fault.
     85 */
     86void do_page_fault(struct pt_regs *regs, unsigned long address,
     87		   unsigned long error_code)
     88{
     89	struct vm_area_struct *vma;
     90	struct mm_struct *mm = current->mm;
     91	int code = SEGV_MAPERR;
     92	int is_write = error_code & ESR_S;
     93	vm_fault_t fault;
     94	unsigned int flags = FAULT_FLAG_DEFAULT;
     95
     96	regs->ear = address;
     97	regs->esr = error_code;
     98
     99	/* On a kernel SLB miss we can only check for a valid exception entry */
    100	if (unlikely(kernel_mode(regs) && (address >= TASK_SIZE))) {
    101		pr_warn("kernel task_size exceed");
    102		_exception(SIGSEGV, regs, code, address);
    103	}
    104
    105	/* for instr TLB miss and instr storage exception ESR_S is undefined */
    106	if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
    107		is_write = 0;
    108
    109	if (unlikely(faulthandler_disabled() || !mm)) {
    110		if (kernel_mode(regs))
    111			goto bad_area_nosemaphore;
    112
    113		/* faulthandler_disabled() in user mode is really bad,
    114		   as is current->mm == NULL. */
    115		pr_emerg("Page fault in user mode with faulthandler_disabled(), mm = %p\n",
    116			 mm);
    117		pr_emerg("r15 = %lx  MSR = %lx\n",
    118		       regs->r15, regs->msr);
    119		die("Weird page fault", regs, SIGSEGV);
    120	}
    121
    122	if (user_mode(regs))
    123		flags |= FAULT_FLAG_USER;
    124
    125	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
    126
    127	/* When running in the kernel we expect faults to occur only to
    128	 * addresses in user space.  All other faults represent errors in the
    129	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
    130	 * erroneous fault occurring in a code path which already holds mmap_lock
    131	 * we will deadlock attempting to validate the fault against the
    132	 * address space.  Luckily the kernel only validly references user
    133	 * space from well defined areas of code, which are listed in the
    134	 * exceptions table.
    135	 *
    136	 * As the vast majority of faults will be valid we will only perform
    137	 * the source reference check when there is a possibility of a deadlock.
    138	 * Attempt to lock the address space, if we cannot we then validate the
    139	 * source.  If this is invalid we can skip the address space check,
    140	 * thus avoiding the deadlock.
    141	 */
    142	if (unlikely(!mmap_read_trylock(mm))) {
    143		if (kernel_mode(regs) && !search_exception_tables(regs->pc))
    144			goto bad_area_nosemaphore;
    145
    146retry:
    147		mmap_read_lock(mm);
    148	}
    149
    150	vma = find_vma(mm, address);
    151	if (unlikely(!vma))
    152		goto bad_area;
    153
    154	if (vma->vm_start <= address)
    155		goto good_area;
    156
    157	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
    158		goto bad_area;
    159
    160	if (unlikely(!is_write))
    161		goto bad_area;
    162
    163	/*
    164	 * N.B. The ABI allows programs to access up to
    165	 * a few hundred bytes below the stack pointer (TBD).
    166	 * The kernel signal delivery code writes up to about 1.5kB
    167	 * below the stack pointer (r1) before decrementing it.
    168	 * The exec code can write slightly over 640kB to the stack
    169	 * before setting the user r1.  Thus we allow the stack to
    170	 * expand to 1MB without further checks.
    171	 */
    172	if (unlikely(address + 0x100000 < vma->vm_end)) {
    173
    174		/* get user regs even if this fault is in kernel mode */
    175		struct pt_regs *uregs = current->thread.regs;
    176		if (uregs == NULL)
    177			goto bad_area;
    178
    179		/*
    180		 * A user-mode access to an address a long way below
    181		 * the stack pointer is only valid if the instruction
    182		 * is one which would update the stack pointer to the
    183		 * address accessed if the instruction completed,
    184		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
    185		 * (or the byte, halfword, float or double forms).
    186		 *
    187		 * If we don't check this then any write to the area
    188		 * between the last mapped region and the stack will
    189		 * expand the stack rather than segfaulting.
    190		 */
    191		if (address + 2048 < uregs->r1
    192			&& (kernel_mode(regs) || !store_updates_sp(regs)))
    193				goto bad_area;
    194	}
    195	if (expand_stack(vma, address))
    196		goto bad_area;
    197
    198good_area:
    199	code = SEGV_ACCERR;
    200
    201	/* a write */
    202	if (unlikely(is_write)) {
    203		if (unlikely(!(vma->vm_flags & VM_WRITE)))
    204			goto bad_area;
    205		flags |= FAULT_FLAG_WRITE;
    206	/* a read */
    207	} else {
    208		/* protection fault */
    209		if (unlikely(error_code & 0x08000000))
    210			goto bad_area;
    211		if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC))))
    212			goto bad_area;
    213	}
    214
    215	/*
    216	 * If for any reason at all we couldn't handle the fault,
    217	 * make sure we exit gracefully rather than endlessly redo
    218	 * the fault.
    219	 */
    220	fault = handle_mm_fault(vma, address, flags, regs);
    221
    222	if (fault_signal_pending(fault, regs))
    223		return;
    224
    225	if (unlikely(fault & VM_FAULT_ERROR)) {
    226		if (fault & VM_FAULT_OOM)
    227			goto out_of_memory;
    228		else if (fault & VM_FAULT_SIGSEGV)
    229			goto bad_area;
    230		else if (fault & VM_FAULT_SIGBUS)
    231			goto do_sigbus;
    232		BUG();
    233	}
    234
    235	if (fault & VM_FAULT_RETRY) {
    236		flags |= FAULT_FLAG_TRIED;
    237
    238		/*
    239		 * No need to mmap_read_unlock(mm) as we would
    240		 * have already released it in __lock_page_or_retry
    241		 * in mm/filemap.c.
    242		 */
    243
    244		goto retry;
    245	}
    246
    247	mmap_read_unlock(mm);
    248
    249	/*
    250	 * keep track of tlb+htab misses that are good addrs but
    251	 * just need pte's created via handle_mm_fault()
    252	 * -- Cort
    253	 */
    254	pte_misses++;
    255	return;
    256
    257bad_area:
    258	mmap_read_unlock(mm);
    259
    260bad_area_nosemaphore:
    261	pte_errors++;
    262
    263	/* User mode accesses cause a SIGSEGV */
    264	if (user_mode(regs)) {
    265		_exception(SIGSEGV, regs, code, address);
    266		return;
    267	}
    268
    269	bad_page_fault(regs, address, SIGSEGV);
    270	return;
    271
    272/*
    273 * We ran out of memory, or some other thing happened to us that made
    274 * us unable to handle the page fault gracefully.
    275 */
    276out_of_memory:
    277	mmap_read_unlock(mm);
    278	if (!user_mode(regs))
    279		bad_page_fault(regs, address, SIGKILL);
    280	else
    281		pagefault_out_of_memory();
    282	return;
    283
    284do_sigbus:
    285	mmap_read_unlock(mm);
    286	if (user_mode(regs)) {
    287		force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
    288		return;
    289	}
    290	bad_page_fault(regs, address, SIGBUS);
    291}