cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

slb.c (22865B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * PowerPC64 SLB support.
      4 *
      5 * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
      6 * Based on earlier code written by:
      7 * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
      8 *    Copyright (c) 2001 Dave Engebretsen
      9 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
     10 */
     11
     12#include <asm/interrupt.h>
     13#include <asm/mmu.h>
     14#include <asm/mmu_context.h>
     15#include <asm/paca.h>
     16#include <asm/ppc-opcode.h>
     17#include <asm/cputable.h>
     18#include <asm/cacheflush.h>
     19#include <asm/smp.h>
     20#include <linux/compiler.h>
     21#include <linux/context_tracking.h>
     22#include <linux/mm_types.h>
     23#include <linux/pgtable.h>
     24
     25#include <asm/udbg.h>
     26#include <asm/code-patching.h>
     27
     28#include "internal.h"
     29
     30
     31static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
     32
     33bool stress_slb_enabled __initdata;
     34
     35static int __init parse_stress_slb(char *p)
     36{
     37	stress_slb_enabled = true;
     38	return 0;
     39}
     40early_param("stress_slb", parse_stress_slb);
     41
     42__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key);
     43
     44static void assert_slb_presence(bool present, unsigned long ea)
     45{
     46#ifdef CONFIG_DEBUG_VM
     47	unsigned long tmp;
     48
     49	WARN_ON_ONCE(mfmsr() & MSR_EE);
     50
     51	if (!cpu_has_feature(CPU_FTR_ARCH_206))
     52		return;
     53
     54	/*
     55	 * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware
     56	 * ignores all other bits from 0-27, so just clear them all.
     57	 */
     58	ea &= ~((1UL << SID_SHIFT) - 1);
     59	asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
     60
     61	WARN_ON(present == (tmp == 0));
     62#endif
     63}
     64
     65static inline void slb_shadow_update(unsigned long ea, int ssize,
     66				     unsigned long flags,
     67				     enum slb_index index)
     68{
     69	struct slb_shadow *p = get_slb_shadow();
     70
     71	/*
     72	 * Clear the ESID first so the entry is not valid while we are
     73	 * updating it.  No write barriers are needed here, provided
     74	 * we only update the current CPU's SLB shadow buffer.
     75	 */
     76	WRITE_ONCE(p->save_area[index].esid, 0);
     77	WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags)));
     78	WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index)));
     79}
     80
     81static inline void slb_shadow_clear(enum slb_index index)
     82{
     83	WRITE_ONCE(get_slb_shadow()->save_area[index].esid, cpu_to_be64(index));
     84}
     85
     86static inline void create_shadowed_slbe(unsigned long ea, int ssize,
     87					unsigned long flags,
     88					enum slb_index index)
     89{
     90	/*
     91	 * Updating the shadow buffer before writing the SLB ensures
     92	 * we don't get a stale entry here if we get preempted by PHYP
     93	 * between these two statements.
     94	 */
     95	slb_shadow_update(ea, ssize, flags, index);
     96
     97	assert_slb_presence(false, ea);
     98	asm volatile("slbmte  %0,%1" :
     99		     : "r" (mk_vsid_data(ea, ssize, flags)),
    100		       "r" (mk_esid_data(ea, ssize, index))
    101		     : "memory" );
    102}
    103
    104/*
    105 * Insert bolted entries into SLB (which may not be empty, so don't clear
    106 * slb_cache_ptr).
    107 */
    108void __slb_restore_bolted_realmode(void)
    109{
    110	struct slb_shadow *p = get_slb_shadow();
    111	enum slb_index index;
    112
    113	 /* No isync needed because realmode. */
    114	for (index = 0; index < SLB_NUM_BOLTED; index++) {
    115		asm volatile("slbmte  %0,%1" :
    116		     : "r" (be64_to_cpu(p->save_area[index].vsid)),
    117		       "r" (be64_to_cpu(p->save_area[index].esid)));
    118	}
    119
    120	assert_slb_presence(true, local_paca->kstack);
    121}
    122
    123/*
    124 * Insert the bolted entries into an empty SLB.
    125 */
    126void slb_restore_bolted_realmode(void)
    127{
    128	__slb_restore_bolted_realmode();
    129	get_paca()->slb_cache_ptr = 0;
    130
    131	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
    132	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
    133}
    134
    135/*
    136 * This flushes all SLB entries including 0, so it must be realmode.
    137 */
    138void slb_flush_all_realmode(void)
    139{
    140	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
    141}
    142
    143static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside)
    144{
    145	struct slb_shadow *p = get_slb_shadow();
    146	unsigned long ksp_esid_data, ksp_vsid_data;
    147	u32 ih;
    148
    149	/*
    150	 * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside
    151	 * information created with Class=0 entries, which we use for kernel
    152	 * SLB entries (the SLB entries themselves are still invalidated).
    153	 *
    154	 * Older processors will ignore this optimisation. Over-invalidation
    155	 * is fine because we never rely on lookaside information existing.
    156	 */
    157	if (preserve_kernel_lookaside)
    158		ih = 1;
    159	else
    160		ih = 0;
    161
    162	ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
    163	ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
    164
    165	asm volatile(PPC_SLBIA(%0)"	\n"
    166		     "slbmte	%1, %2	\n"
    167		     :: "i" (ih),
    168			"r" (ksp_vsid_data),
    169			"r" (ksp_esid_data)
    170		     : "memory");
    171}
    172
    173/*
    174 * This flushes non-bolted entries, it can be run in virtual mode. Must
    175 * be called with interrupts disabled.
    176 */
    177void slb_flush_and_restore_bolted(void)
    178{
    179	BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
    180
    181	WARN_ON(!irqs_disabled());
    182
    183	/*
    184	 * We can't take a PMU exception in the following code, so hard
    185	 * disable interrupts.
    186	 */
    187	hard_irq_disable();
    188
    189	isync();
    190	__slb_flush_and_restore_bolted(false);
    191	isync();
    192
    193	assert_slb_presence(true, get_paca()->kstack);
    194
    195	get_paca()->slb_cache_ptr = 0;
    196
    197	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
    198	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
    199}
    200
    201void slb_save_contents(struct slb_entry *slb_ptr)
    202{
    203	int i;
    204	unsigned long e, v;
    205
    206	/* Save slb_cache_ptr value. */
    207	get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;
    208
    209	if (!slb_ptr)
    210		return;
    211
    212	for (i = 0; i < mmu_slb_size; i++) {
    213		asm volatile("slbmfee  %0,%1" : "=r" (e) : "r" (i));
    214		asm volatile("slbmfev  %0,%1" : "=r" (v) : "r" (i));
    215		slb_ptr->esid = e;
    216		slb_ptr->vsid = v;
    217		slb_ptr++;
    218	}
    219}
    220
    221void slb_dump_contents(struct slb_entry *slb_ptr)
    222{
    223	int i, n;
    224	unsigned long e, v;
    225	unsigned long llp;
    226
    227	if (!slb_ptr)
    228		return;
    229
    230	pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
    231
    232	for (i = 0; i < mmu_slb_size; i++) {
    233		e = slb_ptr->esid;
    234		v = slb_ptr->vsid;
    235		slb_ptr++;
    236
    237		if (!e && !v)
    238			continue;
    239
    240		pr_err("%02d %016lx %016lx %s\n", i, e, v,
    241				(e & SLB_ESID_V) ? "VALID" : "NOT VALID");
    242
    243		if (!(e & SLB_ESID_V))
    244			continue;
    245
    246		llp = v & SLB_VSID_LLP;
    247		if (v & SLB_VSID_B_1T) {
    248			pr_err("     1T ESID=%9lx VSID=%13lx LLP:%3lx\n",
    249			       GET_ESID_1T(e),
    250			       (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp);
    251		} else {
    252			pr_err("   256M ESID=%9lx VSID=%13lx LLP:%3lx\n",
    253			       GET_ESID(e),
    254			       (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp);
    255		}
    256	}
    257
    258	if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
    259		/* RR is not so useful as it's often not used for allocation */
    260		pr_err("SLB RR allocator index %d\n", get_paca()->stab_rr);
    261
    262		/* Dump slb cache entires as well. */
    263		pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr);
    264		pr_err("Valid SLB cache entries:\n");
    265		n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES);
    266		for (i = 0; i < n; i++)
    267			pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
    268		pr_err("Rest of SLB cache entries:\n");
    269		for (i = n; i < SLB_CACHE_ENTRIES; i++)
    270			pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
    271	}
    272}
    273
    274void slb_vmalloc_update(void)
    275{
    276	/*
    277	 * vmalloc is not bolted, so just have to flush non-bolted.
    278	 */
    279	slb_flush_and_restore_bolted();
    280}
    281
    282static bool preload_hit(struct thread_info *ti, unsigned long esid)
    283{
    284	unsigned char i;
    285
    286	for (i = 0; i < ti->slb_preload_nr; i++) {
    287		unsigned char idx;
    288
    289		idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
    290		if (esid == ti->slb_preload_esid[idx])
    291			return true;
    292	}
    293	return false;
    294}
    295
    296static bool preload_add(struct thread_info *ti, unsigned long ea)
    297{
    298	unsigned char idx;
    299	unsigned long esid;
    300
    301	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
    302		/* EAs are stored >> 28 so 256MB segments don't need clearing */
    303		if (ea & ESID_MASK_1T)
    304			ea &= ESID_MASK_1T;
    305	}
    306
    307	esid = ea >> SID_SHIFT;
    308
    309	if (preload_hit(ti, esid))
    310		return false;
    311
    312	idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
    313	ti->slb_preload_esid[idx] = esid;
    314	if (ti->slb_preload_nr == SLB_PRELOAD_NR)
    315		ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
    316	else
    317		ti->slb_preload_nr++;
    318
    319	return true;
    320}
    321
    322static void preload_age(struct thread_info *ti)
    323{
    324	if (!ti->slb_preload_nr)
    325		return;
    326	ti->slb_preload_nr--;
    327	ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
    328}
    329
    330void slb_setup_new_exec(void)
    331{
    332	struct thread_info *ti = current_thread_info();
    333	struct mm_struct *mm = current->mm;
    334	unsigned long exec = 0x10000000;
    335
    336	WARN_ON(irqs_disabled());
    337
    338	/*
    339	 * preload cache can only be used to determine whether a SLB
    340	 * entry exists if it does not start to overflow.
    341	 */
    342	if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR)
    343		return;
    344
    345	hard_irq_disable();
    346
    347	/*
    348	 * We have no good place to clear the slb preload cache on exec,
    349	 * flush_thread is about the earliest arch hook but that happens
    350	 * after we switch to the mm and have already preloaded the SLBEs.
    351	 *
    352	 * For the most part that's probably okay to use entries from the
    353	 * previous exec, they will age out if unused. It may turn out to
    354	 * be an advantage to clear the cache before switching to it,
    355	 * however.
    356	 */
    357
    358	/*
    359	 * preload some userspace segments into the SLB.
    360	 * Almost all 32 and 64bit PowerPC executables are linked at
    361	 * 0x10000000 so it makes sense to preload this segment.
    362	 */
    363	if (!is_kernel_addr(exec)) {
    364		if (preload_add(ti, exec))
    365			slb_allocate_user(mm, exec);
    366	}
    367
    368	/* Libraries and mmaps. */
    369	if (!is_kernel_addr(mm->mmap_base)) {
    370		if (preload_add(ti, mm->mmap_base))
    371			slb_allocate_user(mm, mm->mmap_base);
    372	}
    373
    374	/* see switch_slb */
    375	asm volatile("isync" : : : "memory");
    376
    377	local_irq_enable();
    378}
    379
    380void preload_new_slb_context(unsigned long start, unsigned long sp)
    381{
    382	struct thread_info *ti = current_thread_info();
    383	struct mm_struct *mm = current->mm;
    384	unsigned long heap = mm->start_brk;
    385
    386	WARN_ON(irqs_disabled());
    387
    388	/* see above */
    389	if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR)
    390		return;
    391
    392	hard_irq_disable();
    393
    394	/* Userspace entry address. */
    395	if (!is_kernel_addr(start)) {
    396		if (preload_add(ti, start))
    397			slb_allocate_user(mm, start);
    398	}
    399
    400	/* Top of stack, grows down. */
    401	if (!is_kernel_addr(sp)) {
    402		if (preload_add(ti, sp))
    403			slb_allocate_user(mm, sp);
    404	}
    405
    406	/* Bottom of heap, grows up. */
    407	if (heap && !is_kernel_addr(heap)) {
    408		if (preload_add(ti, heap))
    409			slb_allocate_user(mm, heap);
    410	}
    411
    412	/* see switch_slb */
    413	asm volatile("isync" : : : "memory");
    414
    415	local_irq_enable();
    416}
    417
    418static void slb_cache_slbie_kernel(unsigned int index)
    419{
    420	unsigned long slbie_data = get_paca()->slb_cache[index];
    421	unsigned long ksp = get_paca()->kstack;
    422
    423	slbie_data <<= SID_SHIFT;
    424	slbie_data |= 0xc000000000000000ULL;
    425	if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data)
    426		return;
    427	slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT;
    428
    429	asm volatile("slbie %0" : : "r" (slbie_data));
    430}
    431
    432static void slb_cache_slbie_user(unsigned int index)
    433{
    434	unsigned long slbie_data = get_paca()->slb_cache[index];
    435
    436	slbie_data <<= SID_SHIFT;
    437	slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT;
    438	slbie_data |= SLBIE_C; /* user slbs have C=1 */
    439
    440	asm volatile("slbie %0" : : "r" (slbie_data));
    441}
    442
    443/* Flush all user entries from the segment table of the current processor. */
    444void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
    445{
    446	struct thread_info *ti = task_thread_info(tsk);
    447	unsigned char i;
    448
    449	/*
    450	 * We need interrupts hard-disabled here, not just soft-disabled,
    451	 * so that a PMU interrupt can't occur, which might try to access
    452	 * user memory (to get a stack trace) and possible cause an SLB miss
    453	 * which would update the slb_cache/slb_cache_ptr fields in the PACA.
    454	 */
    455	hard_irq_disable();
    456	isync();
    457	if (stress_slb()) {
    458		__slb_flush_and_restore_bolted(false);
    459		isync();
    460		get_paca()->slb_cache_ptr = 0;
    461		get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
    462
    463	} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
    464		/*
    465		 * SLBIA IH=3 invalidates all Class=1 SLBEs and their
    466		 * associated lookaside structures, which matches what
    467		 * switch_slb wants. So ARCH_300 does not use the slb
    468		 * cache.
    469		 */
    470		asm volatile(PPC_SLBIA(3));
    471
    472	} else {
    473		unsigned long offset = get_paca()->slb_cache_ptr;
    474
    475		if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
    476		    offset <= SLB_CACHE_ENTRIES) {
    477			/*
    478			 * Could assert_slb_presence(true) here, but
    479			 * hypervisor or machine check could have come
    480			 * in and removed the entry at this point.
    481			 */
    482
    483			for (i = 0; i < offset; i++)
    484				slb_cache_slbie_user(i);
    485
    486			/* Workaround POWER5 < DD2.1 issue */
    487			if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
    488				slb_cache_slbie_user(0);
    489
    490		} else {
    491			/* Flush but retain kernel lookaside information */
    492			__slb_flush_and_restore_bolted(true);
    493			isync();
    494
    495			get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
    496		}
    497
    498		get_paca()->slb_cache_ptr = 0;
    499	}
    500	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
    501
    502	copy_mm_to_paca(mm);
    503
    504	/*
    505	 * We gradually age out SLBs after a number of context switches to
    506	 * reduce reload overhead of unused entries (like we do with FP/VEC
    507	 * reload). Each time we wrap 256 switches, take an entry out of the
    508	 * SLB preload cache.
    509	 */
    510	tsk->thread.load_slb++;
    511	if (!tsk->thread.load_slb) {
    512		unsigned long pc = KSTK_EIP(tsk);
    513
    514		preload_age(ti);
    515		preload_add(ti, pc);
    516	}
    517
    518	for (i = 0; i < ti->slb_preload_nr; i++) {
    519		unsigned char idx;
    520		unsigned long ea;
    521
    522		idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
    523		ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
    524
    525		slb_allocate_user(mm, ea);
    526	}
    527
    528	/*
    529	 * Synchronize slbmte preloads with possible subsequent user memory
    530	 * address accesses by the kernel (user mode won't happen until
    531	 * rfid, which is safe).
    532	 */
    533	isync();
    534}
    535
    536void slb_set_size(u16 size)
    537{
    538	mmu_slb_size = size;
    539}
    540
    541void slb_initialize(void)
    542{
    543	unsigned long linear_llp, vmalloc_llp, io_llp;
    544	unsigned long lflags;
    545	static int slb_encoding_inited;
    546#ifdef CONFIG_SPARSEMEM_VMEMMAP
    547	unsigned long vmemmap_llp;
    548#endif
    549
    550	/* Prepare our SLB miss handler based on our page size */
    551	linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
    552	io_llp = mmu_psize_defs[mmu_io_psize].sllp;
    553	vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
    554	get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
    555#ifdef CONFIG_SPARSEMEM_VMEMMAP
    556	vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp;
    557#endif
    558	if (!slb_encoding_inited) {
    559		slb_encoding_inited = 1;
    560		pr_devel("SLB: linear  LLP = %04lx\n", linear_llp);
    561		pr_devel("SLB: io      LLP = %04lx\n", io_llp);
    562#ifdef CONFIG_SPARSEMEM_VMEMMAP
    563		pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
    564#endif
    565	}
    566
    567	get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
    568	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
    569	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
    570
    571	lflags = SLB_VSID_KERNEL | linear_llp;
    572
    573	/* Invalidate the entire SLB (even entry 0) & all the ERATS */
    574	asm volatile("isync":::"memory");
    575	asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
    576	asm volatile("isync; slbia; isync":::"memory");
    577	create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
    578
    579	/*
    580	 * For the boot cpu, we're running on the stack in init_thread_union,
    581	 * which is in the first segment of the linear mapping, and also
    582	 * get_paca()->kstack hasn't been initialized yet.
    583	 * For secondary cpus, we need to bolt the kernel stack entry now.
    584	 */
    585	slb_shadow_clear(KSTACK_INDEX);
    586	if (raw_smp_processor_id() != boot_cpuid &&
    587	    (get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET)
    588		create_shadowed_slbe(get_paca()->kstack,
    589				     mmu_kernel_ssize, lflags, KSTACK_INDEX);
    590
    591	asm volatile("isync":::"memory");
    592}
    593
    594static void slb_cache_update(unsigned long esid_data)
    595{
    596	int slb_cache_index;
    597
    598	if (cpu_has_feature(CPU_FTR_ARCH_300))
    599		return; /* ISAv3.0B and later does not use slb_cache */
    600
    601	if (stress_slb())
    602		return;
    603
    604	/*
    605	 * Now update slb cache entries
    606	 */
    607	slb_cache_index = local_paca->slb_cache_ptr;
    608	if (slb_cache_index < SLB_CACHE_ENTRIES) {
    609		/*
    610		 * We have space in slb cache for optimized switch_slb().
    611		 * Top 36 bits from esid_data as per ISA
    612		 */
    613		local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
    614		local_paca->slb_cache_ptr++;
    615	} else {
    616		/*
    617		 * Our cache is full and the current cache content strictly
    618		 * doesn't indicate the active SLB contents. Bump the ptr
    619		 * so that switch_slb() will ignore the cache.
    620		 */
    621		local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
    622	}
    623}
    624
    625static enum slb_index alloc_slb_index(bool kernel)
    626{
    627	enum slb_index index;
    628
    629	/*
    630	 * The allocation bitmaps can become out of synch with the SLB
    631	 * when the _switch code does slbie when bolting a new stack
    632	 * segment and it must not be anywhere else in the SLB. This leaves
    633	 * a kernel allocated entry that is unused in the SLB. With very
    634	 * large systems or small segment sizes, the bitmaps could slowly
    635	 * fill with these entries. They will eventually be cleared out
    636	 * by the round robin allocator in that case, so it's probably not
    637	 * worth accounting for.
    638	 */
    639
    640	/*
    641	 * SLBs beyond 32 entries are allocated with stab_rr only
    642	 * POWER7/8/9 have 32 SLB entries, this could be expanded if a
    643	 * future CPU has more.
    644	 */
    645	if (local_paca->slb_used_bitmap != U32_MAX) {
    646		index = ffz(local_paca->slb_used_bitmap);
    647		local_paca->slb_used_bitmap |= 1U << index;
    648		if (kernel)
    649			local_paca->slb_kern_bitmap |= 1U << index;
    650	} else {
    651		/* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
    652		index = local_paca->stab_rr;
    653		if (index < (mmu_slb_size - 1))
    654			index++;
    655		else
    656			index = SLB_NUM_BOLTED;
    657		local_paca->stab_rr = index;
    658		if (index < 32) {
    659			if (kernel)
    660				local_paca->slb_kern_bitmap |= 1U << index;
    661			else
    662				local_paca->slb_kern_bitmap &= ~(1U << index);
    663		}
    664	}
    665	BUG_ON(index < SLB_NUM_BOLTED);
    666
    667	return index;
    668}
    669
    670static long slb_insert_entry(unsigned long ea, unsigned long context,
    671				unsigned long flags, int ssize, bool kernel)
    672{
    673	unsigned long vsid;
    674	unsigned long vsid_data, esid_data;
    675	enum slb_index index;
    676
    677	vsid = get_vsid(context, ea, ssize);
    678	if (!vsid)
    679		return -EFAULT;
    680
    681	/*
    682	 * There must not be a kernel SLB fault in alloc_slb_index or before
    683	 * slbmte here or the allocation bitmaps could get out of whack with
    684	 * the SLB.
    685	 *
    686	 * User SLB faults or preloads take this path which might get inlined
    687	 * into the caller, so add compiler barriers here to ensure unsafe
    688	 * memory accesses do not come between.
    689	 */
    690	barrier();
    691
    692	index = alloc_slb_index(kernel);
    693
    694	vsid_data = __mk_vsid_data(vsid, ssize, flags);
    695	esid_data = mk_esid_data(ea, ssize, index);
    696
    697	/*
    698	 * No need for an isync before or after this slbmte. The exception
    699	 * we enter with and the rfid we exit with are context synchronizing.
    700	 * User preloads should add isync afterwards in case the kernel
    701	 * accesses user memory before it returns to userspace with rfid.
    702	 */
    703	assert_slb_presence(false, ea);
    704	if (stress_slb()) {
    705		int slb_cache_index = local_paca->slb_cache_ptr;
    706
    707		/*
    708		 * stress_slb() does not use slb cache, repurpose as a
    709		 * cache of inserted (non-bolted) kernel SLB entries. All
    710		 * non-bolted kernel entries are flushed on any user fault,
    711		 * or if there are already 3 non-boled kernel entries.
    712		 */
    713		BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3);
    714		if (!kernel || slb_cache_index == 3) {
    715			int i;
    716
    717			for (i = 0; i < slb_cache_index; i++)
    718				slb_cache_slbie_kernel(i);
    719			slb_cache_index = 0;
    720		}
    721
    722		if (kernel)
    723			local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
    724		local_paca->slb_cache_ptr = slb_cache_index;
    725	}
    726	asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
    727
    728	barrier();
    729
    730	if (!kernel)
    731		slb_cache_update(esid_data);
    732
    733	return 0;
    734}
    735
    736static long slb_allocate_kernel(unsigned long ea, unsigned long id)
    737{
    738	unsigned long context;
    739	unsigned long flags;
    740	int ssize;
    741
    742	if (id == LINEAR_MAP_REGION_ID) {
    743
    744		/* We only support upto H_MAX_PHYSMEM_BITS */
    745		if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS))
    746			return -EFAULT;
    747
    748		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
    749
    750#ifdef CONFIG_SPARSEMEM_VMEMMAP
    751	} else if (id == VMEMMAP_REGION_ID) {
    752
    753		if (ea >= H_VMEMMAP_END)
    754			return -EFAULT;
    755
    756		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
    757#endif
    758	} else if (id == VMALLOC_REGION_ID) {
    759
    760		if (ea >= H_VMALLOC_END)
    761			return -EFAULT;
    762
    763		flags = local_paca->vmalloc_sllp;
    764
    765	} else if (id == IO_REGION_ID) {
    766
    767		if (ea >= H_KERN_IO_END)
    768			return -EFAULT;
    769
    770		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
    771
    772	} else {
    773		return -EFAULT;
    774	}
    775
    776	ssize = MMU_SEGSIZE_1T;
    777	if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
    778		ssize = MMU_SEGSIZE_256M;
    779
    780	context = get_kernel_context(ea);
    781
    782	return slb_insert_entry(ea, context, flags, ssize, true);
    783}
    784
    785static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
    786{
    787	unsigned long context;
    788	unsigned long flags;
    789	int bpsize;
    790	int ssize;
    791
    792	/*
    793	 * consider this as bad access if we take a SLB miss
    794	 * on an address above addr limit.
    795	 */
    796	if (ea >= mm_ctx_slb_addr_limit(&mm->context))
    797		return -EFAULT;
    798
    799	context = get_user_context(&mm->context, ea);
    800	if (!context)
    801		return -EFAULT;
    802
    803	if (unlikely(ea >= H_PGTABLE_RANGE)) {
    804		WARN_ON(1);
    805		return -EFAULT;
    806	}
    807
    808	ssize = user_segment_size(ea);
    809
    810	bpsize = get_slice_psize(mm, ea);
    811	flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
    812
    813	return slb_insert_entry(ea, context, flags, ssize, false);
    814}
    815
    816DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
    817{
    818	unsigned long ea = regs->dar;
    819	unsigned long id = get_region_id(ea);
    820
    821	/* IRQs are not reconciled here, so can't check irqs_disabled */
    822	VM_WARN_ON(mfmsr() & MSR_EE);
    823
    824	if (regs_is_unrecoverable(regs))
    825		return -EINVAL;
    826
    827	/*
    828	 * SLB kernel faults must be very careful not to touch anything that is
    829	 * not bolted. E.g., PACA and global variables are okay, mm->context
    830	 * stuff is not. SLB user faults may access all of memory (and induce
    831	 * one recursive SLB kernel fault), so the kernel fault must not
    832	 * trample on the user fault state at those points.
    833	 */
    834
    835	/*
    836	 * This is a raw interrupt handler, for performance, so that
    837	 * fast_interrupt_return can be used. The handler must not touch local
    838	 * irq state, or schedule. We could test for usermode and upgrade to a
    839	 * normal process context (synchronous) interrupt for those, which
    840	 * would make them first-class kernel code and able to be traced and
    841	 * instrumented, although performance would suffer a bit, it would
    842	 * probably be a good tradeoff.
    843	 */
    844	if (id >= LINEAR_MAP_REGION_ID) {
    845		long err;
    846#ifdef CONFIG_DEBUG_VM
    847		/* Catch recursive kernel SLB faults. */
    848		BUG_ON(local_paca->in_kernel_slb_handler);
    849		local_paca->in_kernel_slb_handler = 1;
    850#endif
    851		err = slb_allocate_kernel(ea, id);
    852#ifdef CONFIG_DEBUG_VM
    853		local_paca->in_kernel_slb_handler = 0;
    854#endif
    855		return err;
    856	} else {
    857		struct mm_struct *mm = current->mm;
    858		long err;
    859
    860		if (unlikely(!mm))
    861			return -EFAULT;
    862
    863		err = slb_allocate_user(mm, ea);
    864		if (!err)
    865			preload_add(current_thread_info(), ea);
    866
    867		return err;
    868	}
    869}