cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

fault_64.c (13559B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * arch/sparc64/mm/fault.c: Page fault handlers for the 64-bit Sparc.
      4 *
      5 * Copyright (C) 1996, 2008 David S. Miller (davem@davemloft.net)
      6 * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
      7 */
      8
      9#include <asm/head.h>
     10
     11#include <linux/string.h>
     12#include <linux/types.h>
     13#include <linux/sched.h>
     14#include <linux/sched/debug.h>
     15#include <linux/ptrace.h>
     16#include <linux/mman.h>
     17#include <linux/signal.h>
     18#include <linux/mm.h>
     19#include <linux/extable.h>
     20#include <linux/init.h>
     21#include <linux/perf_event.h>
     22#include <linux/interrupt.h>
     23#include <linux/kprobes.h>
     24#include <linux/kdebug.h>
     25#include <linux/percpu.h>
     26#include <linux/context_tracking.h>
     27#include <linux/uaccess.h>
     28
     29#include <asm/page.h>
     30#include <asm/openprom.h>
     31#include <asm/oplib.h>
     32#include <asm/asi.h>
     33#include <asm/lsu.h>
     34#include <asm/sections.h>
     35#include <asm/mmu_context.h>
     36#include <asm/setup.h>
     37
     38int show_unhandled_signals = 1;
     39
     40static void __kprobes unhandled_fault(unsigned long address,
     41				      struct task_struct *tsk,
     42				      struct pt_regs *regs)
     43{
     44	if ((unsigned long) address < PAGE_SIZE) {
     45		printk(KERN_ALERT "Unable to handle kernel NULL "
     46		       "pointer dereference\n");
     47	} else {
     48		printk(KERN_ALERT "Unable to handle kernel paging request "
     49		       "at virtual address %016lx\n", (unsigned long)address);
     50	}
     51	printk(KERN_ALERT "tsk->{mm,active_mm}->context = %016lx\n",
     52	       (tsk->mm ?
     53		CTX_HWBITS(tsk->mm->context) :
     54		CTX_HWBITS(tsk->active_mm->context)));
     55	printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %016lx\n",
     56	       (tsk->mm ? (unsigned long) tsk->mm->pgd :
     57		          (unsigned long) tsk->active_mm->pgd));
     58	die_if_kernel("Oops", regs);
     59}
     60
     61static void __kprobes bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr)
     62{
     63	printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n",
     64	       regs->tpc);
     65	printk(KERN_CRIT "OOPS: RPC [%016lx]\n", regs->u_regs[15]);
     66	printk("OOPS: RPC <%pS>\n", (void *) regs->u_regs[15]);
     67	printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr);
     68	dump_stack();
     69	unhandled_fault(regs->tpc, current, regs);
     70}
     71
     72/*
     73 * We now make sure that mmap_lock is held in all paths that call
     74 * this. Additionally, to prevent kswapd from ripping ptes from
     75 * under us, raise interrupts around the time that we look at the
     76 * pte, kswapd will have to wait to get his smp ipi response from
     77 * us. vmtruncate likewise. This saves us having to get pte lock.
     78 */
     79static unsigned int get_user_insn(unsigned long tpc)
     80{
     81	pgd_t *pgdp = pgd_offset(current->mm, tpc);
     82	p4d_t *p4dp;
     83	pud_t *pudp;
     84	pmd_t *pmdp;
     85	pte_t *ptep, pte;
     86	unsigned long pa;
     87	u32 insn = 0;
     88
     89	if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
     90		goto out;
     91	p4dp = p4d_offset(pgdp, tpc);
     92	if (p4d_none(*p4dp) || unlikely(p4d_bad(*p4dp)))
     93		goto out;
     94	pudp = pud_offset(p4dp, tpc);
     95	if (pud_none(*pudp) || unlikely(pud_bad(*pudp)))
     96		goto out;
     97
     98	/* This disables preemption for us as well. */
     99	local_irq_disable();
    100
    101	pmdp = pmd_offset(pudp, tpc);
    102	if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp)))
    103		goto out_irq_enable;
    104
    105#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
    106	if (is_hugetlb_pmd(*pmdp)) {
    107		pa  = pmd_pfn(*pmdp) << PAGE_SHIFT;
    108		pa += tpc & ~HPAGE_MASK;
    109
    110		/* Use phys bypass so we don't pollute dtlb/dcache. */
    111		__asm__ __volatile__("lduwa [%1] %2, %0"
    112				     : "=r" (insn)
    113				     : "r" (pa), "i" (ASI_PHYS_USE_EC));
    114	} else
    115#endif
    116	{
    117		ptep = pte_offset_map(pmdp, tpc);
    118		pte = *ptep;
    119		if (pte_present(pte)) {
    120			pa  = (pte_pfn(pte) << PAGE_SHIFT);
    121			pa += (tpc & ~PAGE_MASK);
    122
    123			/* Use phys bypass so we don't pollute dtlb/dcache. */
    124			__asm__ __volatile__("lduwa [%1] %2, %0"
    125					     : "=r" (insn)
    126					     : "r" (pa), "i" (ASI_PHYS_USE_EC));
    127		}
    128		pte_unmap(ptep);
    129	}
    130out_irq_enable:
    131	local_irq_enable();
    132out:
    133	return insn;
    134}
    135
    136static inline void
    137show_signal_msg(struct pt_regs *regs, int sig, int code,
    138		unsigned long address, struct task_struct *tsk)
    139{
    140	if (!unhandled_signal(tsk, sig))
    141		return;
    142
    143	if (!printk_ratelimit())
    144		return;
    145
    146	printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
    147	       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
    148	       tsk->comm, task_pid_nr(tsk), address,
    149	       (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
    150	       (void *)regs->u_regs[UREG_FP], code);
    151
    152	print_vma_addr(KERN_CONT " in ", regs->tpc);
    153
    154	printk(KERN_CONT "\n");
    155}
    156
    157static void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
    158			     unsigned long fault_addr, unsigned int insn,
    159			     int fault_code)
    160{
    161	unsigned long addr;
    162
    163	if (fault_code & FAULT_CODE_ITLB) {
    164		addr = regs->tpc;
    165	} else {
    166		/* If we were able to probe the faulting instruction, use it
    167		 * to compute a precise fault address.  Otherwise use the fault
    168		 * time provided address which may only have page granularity.
    169		 */
    170		if (insn)
    171			addr = compute_effective_address(regs, insn, 0);
    172		else
    173			addr = fault_addr;
    174	}
    175
    176	if (unlikely(show_unhandled_signals))
    177		show_signal_msg(regs, sig, code, addr, current);
    178
    179	force_sig_fault(sig, code, (void __user *) addr);
    180}
    181
    182static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn)
    183{
    184	if (!insn) {
    185		if (!regs->tpc || (regs->tpc & 0x3))
    186			return 0;
    187		if (regs->tstate & TSTATE_PRIV) {
    188			insn = *(unsigned int *) regs->tpc;
    189		} else {
    190			insn = get_user_insn(regs->tpc);
    191		}
    192	}
    193	return insn;
    194}
    195
    196static void __kprobes do_kernel_fault(struct pt_regs *regs, int si_code,
    197				      int fault_code, unsigned int insn,
    198				      unsigned long address)
    199{
    200	unsigned char asi = ASI_P;
    201 
    202	if ((!insn) && (regs->tstate & TSTATE_PRIV))
    203		goto cannot_handle;
    204
    205	/* If user insn could be read (thus insn is zero), that
    206	 * is fine.  We will just gun down the process with a signal
    207	 * in that case.
    208	 */
    209
    210	if (!(fault_code & (FAULT_CODE_WRITE|FAULT_CODE_ITLB)) &&
    211	    (insn & 0xc0800000) == 0xc0800000) {
    212		if (insn & 0x2000)
    213			asi = (regs->tstate >> 24);
    214		else
    215			asi = (insn >> 5);
    216		if ((asi & 0xf2) == 0x82) {
    217			if (insn & 0x1000000) {
    218				handle_ldf_stq(insn, regs);
    219			} else {
    220				/* This was a non-faulting load. Just clear the
    221				 * destination register(s) and continue with the next
    222				 * instruction. -jj
    223				 */
    224				handle_ld_nf(insn, regs);
    225			}
    226			return;
    227		}
    228	}
    229		
    230	/* Is this in ex_table? */
    231	if (regs->tstate & TSTATE_PRIV) {
    232		const struct exception_table_entry *entry;
    233
    234		entry = search_exception_tables(regs->tpc);
    235		if (entry) {
    236			regs->tpc = entry->fixup;
    237			regs->tnpc = regs->tpc + 4;
    238			return;
    239		}
    240	} else {
    241		/* The si_code was set to make clear whether
    242		 * this was a SEGV_MAPERR or SEGV_ACCERR fault.
    243		 */
    244		do_fault_siginfo(si_code, SIGSEGV, regs, address, insn, fault_code);
    245		return;
    246	}
    247
    248cannot_handle:
    249	unhandled_fault (address, current, regs);
    250}
    251
    252static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs)
    253{
    254	static int times;
    255
    256	if (times++ < 10)
    257		printk(KERN_ERR "FAULT[%s:%d]: 32-bit process reports "
    258		       "64-bit TPC [%lx]\n",
    259		       current->comm, current->pid,
    260		       regs->tpc);
    261	show_regs(regs);
    262}
    263
    264asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
    265{
    266	enum ctx_state prev_state = exception_enter();
    267	struct mm_struct *mm = current->mm;
    268	struct vm_area_struct *vma;
    269	unsigned int insn = 0;
    270	int si_code, fault_code;
    271	vm_fault_t fault;
    272	unsigned long address, mm_rss;
    273	unsigned int flags = FAULT_FLAG_DEFAULT;
    274
    275	fault_code = get_thread_fault_code();
    276
    277	if (kprobe_page_fault(regs, 0))
    278		goto exit_exception;
    279
    280	si_code = SEGV_MAPERR;
    281	address = current_thread_info()->fault_address;
    282
    283	if ((fault_code & FAULT_CODE_ITLB) &&
    284	    (fault_code & FAULT_CODE_DTLB))
    285		BUG();
    286
    287	if (test_thread_flag(TIF_32BIT)) {
    288		if (!(regs->tstate & TSTATE_PRIV)) {
    289			if (unlikely((regs->tpc >> 32) != 0)) {
    290				bogus_32bit_fault_tpc(regs);
    291				goto intr_or_no_mm;
    292			}
    293		}
    294		if (unlikely((address >> 32) != 0))
    295			goto intr_or_no_mm;
    296	}
    297
    298	if (regs->tstate & TSTATE_PRIV) {
    299		unsigned long tpc = regs->tpc;
    300
    301		/* Sanity check the PC. */
    302		if ((tpc >= KERNBASE && tpc < (unsigned long) __init_end) ||
    303		    (tpc >= MODULES_VADDR && tpc < MODULES_END)) {
    304			/* Valid, no problems... */
    305		} else {
    306			bad_kernel_pc(regs, address);
    307			goto exit_exception;
    308		}
    309	} else
    310		flags |= FAULT_FLAG_USER;
    311
    312	/*
    313	 * If we're in an interrupt or have no user
    314	 * context, we must not take the fault..
    315	 */
    316	if (faulthandler_disabled() || !mm)
    317		goto intr_or_no_mm;
    318
    319	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
    320
    321	if (!mmap_read_trylock(mm)) {
    322		if ((regs->tstate & TSTATE_PRIV) &&
    323		    !search_exception_tables(regs->tpc)) {
    324			insn = get_fault_insn(regs, insn);
    325			goto handle_kernel_fault;
    326		}
    327
    328retry:
    329		mmap_read_lock(mm);
    330	}
    331
    332	if (fault_code & FAULT_CODE_BAD_RA)
    333		goto do_sigbus;
    334
    335	vma = find_vma(mm, address);
    336	if (!vma)
    337		goto bad_area;
    338
    339	/* Pure DTLB misses do not tell us whether the fault causing
    340	 * load/store/atomic was a write or not, it only says that there
    341	 * was no match.  So in such a case we (carefully) read the
    342	 * instruction to try and figure this out.  It's an optimization
    343	 * so it's ok if we can't do this.
    344	 *
    345	 * Special hack, window spill/fill knows the exact fault type.
    346	 */
    347	if (((fault_code &
    348	      (FAULT_CODE_DTLB | FAULT_CODE_WRITE | FAULT_CODE_WINFIXUP)) == FAULT_CODE_DTLB) &&
    349	    (vma->vm_flags & VM_WRITE) != 0) {
    350		insn = get_fault_insn(regs, 0);
    351		if (!insn)
    352			goto continue_fault;
    353		/* All loads, stores and atomics have bits 30 and 31 both set
    354		 * in the instruction.  Bit 21 is set in all stores, but we
    355		 * have to avoid prefetches which also have bit 21 set.
    356		 */
    357		if ((insn & 0xc0200000) == 0xc0200000 &&
    358		    (insn & 0x01780000) != 0x01680000) {
    359			/* Don't bother updating thread struct value,
    360			 * because update_mmu_cache only cares which tlb
    361			 * the access came from.
    362			 */
    363			fault_code |= FAULT_CODE_WRITE;
    364		}
    365	}
    366continue_fault:
    367
    368	if (vma->vm_start <= address)
    369		goto good_area;
    370	if (!(vma->vm_flags & VM_GROWSDOWN))
    371		goto bad_area;
    372	if (!(fault_code & FAULT_CODE_WRITE)) {
    373		/* Non-faulting loads shouldn't expand stack. */
    374		insn = get_fault_insn(regs, insn);
    375		if ((insn & 0xc0800000) == 0xc0800000) {
    376			unsigned char asi;
    377
    378			if (insn & 0x2000)
    379				asi = (regs->tstate >> 24);
    380			else
    381				asi = (insn >> 5);
    382			if ((asi & 0xf2) == 0x82)
    383				goto bad_area;
    384		}
    385	}
    386	if (expand_stack(vma, address))
    387		goto bad_area;
    388	/*
    389	 * Ok, we have a good vm_area for this memory access, so
    390	 * we can handle it..
    391	 */
    392good_area:
    393	si_code = SEGV_ACCERR;
    394
    395	/* If we took a ITLB miss on a non-executable page, catch
    396	 * that here.
    397	 */
    398	if ((fault_code & FAULT_CODE_ITLB) && !(vma->vm_flags & VM_EXEC)) {
    399		WARN(address != regs->tpc,
    400		     "address (%lx) != regs->tpc (%lx)\n", address, regs->tpc);
    401		WARN_ON(regs->tstate & TSTATE_PRIV);
    402		goto bad_area;
    403	}
    404
    405	if (fault_code & FAULT_CODE_WRITE) {
    406		if (!(vma->vm_flags & VM_WRITE))
    407			goto bad_area;
    408
    409		/* Spitfire has an icache which does not snoop
    410		 * processor stores.  Later processors do...
    411		 */
    412		if (tlb_type == spitfire &&
    413		    (vma->vm_flags & VM_EXEC) != 0 &&
    414		    vma->vm_file != NULL)
    415			set_thread_fault_code(fault_code |
    416					      FAULT_CODE_BLKCOMMIT);
    417
    418		flags |= FAULT_FLAG_WRITE;
    419	} else {
    420		/* Allow reads even for write-only mappings */
    421		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
    422			goto bad_area;
    423	}
    424
    425	fault = handle_mm_fault(vma, address, flags, regs);
    426
    427	if (fault_signal_pending(fault, regs))
    428		goto exit_exception;
    429
    430	if (unlikely(fault & VM_FAULT_ERROR)) {
    431		if (fault & VM_FAULT_OOM)
    432			goto out_of_memory;
    433		else if (fault & VM_FAULT_SIGSEGV)
    434			goto bad_area;
    435		else if (fault & VM_FAULT_SIGBUS)
    436			goto do_sigbus;
    437		BUG();
    438	}
    439
    440	if (fault & VM_FAULT_RETRY) {
    441		flags |= FAULT_FLAG_TRIED;
    442
    443		/* No need to mmap_read_unlock(mm) as we would
    444		 * have already released it in __lock_page_or_retry
    445		 * in mm/filemap.c.
    446		 */
    447
    448		goto retry;
    449	}
    450	mmap_read_unlock(mm);
    451
    452	mm_rss = get_mm_rss(mm);
    453#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
    454	mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE));
    455#endif
    456	if (unlikely(mm_rss >
    457		     mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
    458		tsb_grow(mm, MM_TSB_BASE, mm_rss);
    459#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
    460	mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count;
    461	mm_rss *= REAL_HPAGE_PER_HPAGE;
    462	if (unlikely(mm_rss >
    463		     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
    464		if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
    465			tsb_grow(mm, MM_TSB_HUGE, mm_rss);
    466		else
    467			hugetlb_setup(regs);
    468
    469	}
    470#endif
    471exit_exception:
    472	exception_exit(prev_state);
    473	return;
    474
    475	/*
    476	 * Something tried to access memory that isn't in our memory map..
    477	 * Fix it, but check if it's kernel or user first..
    478	 */
    479bad_area:
    480	insn = get_fault_insn(regs, insn);
    481	mmap_read_unlock(mm);
    482
    483handle_kernel_fault:
    484	do_kernel_fault(regs, si_code, fault_code, insn, address);
    485	goto exit_exception;
    486
    487/*
    488 * We ran out of memory, or some other thing happened to us that made
    489 * us unable to handle the page fault gracefully.
    490 */
    491out_of_memory:
    492	insn = get_fault_insn(regs, insn);
    493	mmap_read_unlock(mm);
    494	if (!(regs->tstate & TSTATE_PRIV)) {
    495		pagefault_out_of_memory();
    496		goto exit_exception;
    497	}
    498	goto handle_kernel_fault;
    499
    500intr_or_no_mm:
    501	insn = get_fault_insn(regs, 0);
    502	goto handle_kernel_fault;
    503
    504do_sigbus:
    505	insn = get_fault_insn(regs, insn);
    506	mmap_read_unlock(mm);
    507
    508	/*
    509	 * Send a sigbus, regardless of whether we were in kernel
    510	 * or user mode.
    511	 */
    512	do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, address, insn, fault_code);
    513
    514	/* Kernel mode? Handle exceptions or die */
    515	if (regs->tstate & TSTATE_PRIV)
    516		goto handle_kernel_fault;
    517}