cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

radix_tlb.c (42543B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * TLB flush routines for radix kernels.
      4 *
      5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
      6 */
      7
      8#include <linux/mm.h>
      9#include <linux/hugetlb.h>
     10#include <linux/memblock.h>
     11#include <linux/mmu_context.h>
     12#include <linux/sched/mm.h>
     13#include <linux/debugfs.h>
     14
     15#include <asm/ppc-opcode.h>
     16#include <asm/tlb.h>
     17#include <asm/tlbflush.h>
     18#include <asm/trace.h>
     19#include <asm/cputhreads.h>
     20#include <asm/plpar_wrappers.h>
     21
     22#include "internal.h"
     23
     24/*
     25 * tlbiel instruction for radix, set invalidation
     26 * i.e., r=1 and is=01 or is=10 or is=11
     27 */
     28static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
     29					unsigned int pid,
     30					unsigned int ric, unsigned int prs)
     31{
     32	unsigned long rb;
     33	unsigned long rs;
     34
     35	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
     36	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
     37
     38	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
     39		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
     40		     : "memory");
     41}
     42
     43static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
     44{
     45	unsigned int set;
     46
     47	asm volatile("ptesync": : :"memory");
     48
     49	/*
     50	 * Flush the first set of the TLB, and the entire Page Walk Cache
     51	 * and partition table entries. Then flush the remaining sets of the
     52	 * TLB.
     53	 */
     54
     55	if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
     56		/* MSR[HV] should flush partition scope translations first. */
     57		tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
     58
     59		if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
     60			for (set = 1; set < num_sets; set++)
     61				tlbiel_radix_set_isa300(set, is, 0,
     62							RIC_FLUSH_TLB, 0);
     63		}
     64	}
     65
     66	/* Flush process scoped entries. */
     67	tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
     68
     69	if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
     70		for (set = 1; set < num_sets; set++)
     71			tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
     72	}
     73
     74	ppc_after_tlbiel_barrier();
     75}
     76
     77void radix__tlbiel_all(unsigned int action)
     78{
     79	unsigned int is;
     80
     81	switch (action) {
     82	case TLB_INVAL_SCOPE_GLOBAL:
     83		is = 3;
     84		break;
     85	case TLB_INVAL_SCOPE_LPID:
     86		is = 2;
     87		break;
     88	default:
     89		BUG();
     90	}
     91
     92	if (early_cpu_has_feature(CPU_FTR_ARCH_300))
     93		tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
     94	else
     95		WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
     96
     97	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
     98}
     99
    100static __always_inline void __tlbiel_pid(unsigned long pid, int set,
    101				unsigned long ric)
    102{
    103	unsigned long rb,rs,prs,r;
    104
    105	rb = PPC_BIT(53); /* IS = 1 */
    106	rb |= set << PPC_BITLSHIFT(51);
    107	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
    108	prs = 1; /* process scoped */
    109	r = 1;   /* radix format */
    110
    111	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
    112		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
    113	trace_tlbie(0, 1, rb, rs, ric, prs, r);
    114}
    115
    116static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
    117{
    118	unsigned long rb,rs,prs,r;
    119
    120	rb = PPC_BIT(53); /* IS = 1 */
    121	rs = pid << PPC_BITLSHIFT(31);
    122	prs = 1; /* process scoped */
    123	r = 1;   /* radix format */
    124
    125	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
    126		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
    127	trace_tlbie(0, 0, rb, rs, ric, prs, r);
    128}
    129
    130static __always_inline void __tlbie_pid_lpid(unsigned long pid,
    131					     unsigned long lpid,
    132					     unsigned long ric)
    133{
    134	unsigned long rb, rs, prs, r;
    135
    136	rb = PPC_BIT(53); /* IS = 1 */
    137	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
    138	prs = 1; /* process scoped */
    139	r = 1;   /* radix format */
    140
    141	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
    142		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
    143	trace_tlbie(0, 0, rb, rs, ric, prs, r);
    144}
    145static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
    146{
    147	unsigned long rb,rs,prs,r;
    148
    149	rb = PPC_BIT(52); /* IS = 2 */
    150	rs = lpid;
    151	prs = 0; /* partition scoped */
    152	r = 1;   /* radix format */
    153
    154	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
    155		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
    156	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
    157}
    158
    159static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
    160{
    161	unsigned long rb,rs,prs,r;
    162
    163	rb = PPC_BIT(52); /* IS = 2 */
    164	rs = lpid;
    165	prs = 1; /* process scoped */
    166	r = 1;   /* radix format */
    167
    168	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
    169		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
    170	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
    171}
    172
    173static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
    174					unsigned long ap, unsigned long ric)
    175{
    176	unsigned long rb,rs,prs,r;
    177
    178	rb = va & ~(PPC_BITMASK(52, 63));
    179	rb |= ap << PPC_BITLSHIFT(58);
    180	rs = pid << PPC_BITLSHIFT(31);
    181	prs = 1; /* process scoped */
    182	r = 1;   /* radix format */
    183
    184	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
    185		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
    186	trace_tlbie(0, 1, rb, rs, ric, prs, r);
    187}
    188
    189static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
    190				       unsigned long ap, unsigned long ric)
    191{
    192	unsigned long rb,rs,prs,r;
    193
    194	rb = va & ~(PPC_BITMASK(52, 63));
    195	rb |= ap << PPC_BITLSHIFT(58);
    196	rs = pid << PPC_BITLSHIFT(31);
    197	prs = 1; /* process scoped */
    198	r = 1;   /* radix format */
    199
    200	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
    201		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
    202	trace_tlbie(0, 0, rb, rs, ric, prs, r);
    203}
    204
    205static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
    206					    unsigned long lpid,
    207					    unsigned long ap, unsigned long ric)
    208{
    209	unsigned long rb, rs, prs, r;
    210
    211	rb = va & ~(PPC_BITMASK(52, 63));
    212	rb |= ap << PPC_BITLSHIFT(58);
    213	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
    214	prs = 1; /* process scoped */
    215	r = 1;   /* radix format */
    216
    217	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
    218		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
    219	trace_tlbie(0, 0, rb, rs, ric, prs, r);
    220}
    221
    222static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
    223					    unsigned long ap, unsigned long ric)
    224{
    225	unsigned long rb,rs,prs,r;
    226
    227	rb = va & ~(PPC_BITMASK(52, 63));
    228	rb |= ap << PPC_BITLSHIFT(58);
    229	rs = lpid;
    230	prs = 0; /* partition scoped */
    231	r = 1;   /* radix format */
    232
    233	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
    234		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
    235	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
    236}
    237
    238
    239static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
    240				  unsigned long ap)
    241{
    242	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
    243		asm volatile("ptesync": : :"memory");
    244		__tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
    245	}
    246
    247	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
    248		asm volatile("ptesync": : :"memory");
    249		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
    250	}
    251}
    252
    253static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
    254					unsigned long ap)
    255{
    256	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
    257		asm volatile("ptesync": : :"memory");
    258		__tlbie_pid(0, RIC_FLUSH_TLB);
    259	}
    260
    261	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
    262		asm volatile("ptesync": : :"memory");
    263		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
    264	}
    265}
    266
    267static inline void fixup_tlbie_va_range_lpid(unsigned long va,
    268					     unsigned long pid,
    269					     unsigned long lpid,
    270					     unsigned long ap)
    271{
    272	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
    273		asm volatile("ptesync" : : : "memory");
    274		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
    275	}
    276
    277	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
    278		asm volatile("ptesync" : : : "memory");
    279		__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
    280	}
    281}
    282
    283static inline void fixup_tlbie_pid(unsigned long pid)
    284{
    285	/*
    286	 * We can use any address for the invalidation, pick one which is
    287	 * probably unused as an optimisation.
    288	 */
    289	unsigned long va = ((1UL << 52) - 1);
    290
    291	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
    292		asm volatile("ptesync": : :"memory");
    293		__tlbie_pid(0, RIC_FLUSH_TLB);
    294	}
    295
    296	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
    297		asm volatile("ptesync": : :"memory");
    298		__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
    299	}
    300}
    301
    302static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
    303{
    304	/*
    305	 * We can use any address for the invalidation, pick one which is
    306	 * probably unused as an optimisation.
    307	 */
    308	unsigned long va = ((1UL << 52) - 1);
    309
    310	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
    311		asm volatile("ptesync" : : : "memory");
    312		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
    313	}
    314
    315	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
    316		asm volatile("ptesync" : : : "memory");
    317		__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
    318				RIC_FLUSH_TLB);
    319	}
    320}
    321
    322static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
    323				       unsigned long ap)
    324{
    325	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
    326		asm volatile("ptesync": : :"memory");
    327		__tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
    328	}
    329
    330	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
    331		asm volatile("ptesync": : :"memory");
    332		__tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
    333	}
    334}
    335
    336static inline void fixup_tlbie_lpid(unsigned long lpid)
    337{
    338	/*
    339	 * We can use any address for the invalidation, pick one which is
    340	 * probably unused as an optimisation.
    341	 */
    342	unsigned long va = ((1UL << 52) - 1);
    343
    344	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
    345		asm volatile("ptesync": : :"memory");
    346		__tlbie_lpid(0, RIC_FLUSH_TLB);
    347	}
    348
    349	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
    350		asm volatile("ptesync": : :"memory");
    351		__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
    352	}
    353}
    354
    355/*
    356 * We use 128 set in radix mode and 256 set in hpt mode.
    357 */
    358static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
    359{
    360	int set;
    361
    362	asm volatile("ptesync": : :"memory");
    363
    364	switch (ric) {
    365	case RIC_FLUSH_PWC:
    366
    367		/* For PWC, only one flush is needed */
    368		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
    369		ppc_after_tlbiel_barrier();
    370		return;
    371	case RIC_FLUSH_TLB:
    372		__tlbiel_pid(pid, 0, RIC_FLUSH_TLB);
    373		break;
    374	case RIC_FLUSH_ALL:
    375	default:
    376		/*
    377		 * Flush the first set of the TLB, and if
    378		 * we're doing a RIC_FLUSH_ALL, also flush
    379		 * the entire Page Walk Cache.
    380		 */
    381		__tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
    382	}
    383
    384	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
    385		/* For the remaining sets, just flush the TLB */
    386		for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
    387			__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
    388	}
    389
    390	ppc_after_tlbiel_barrier();
    391	asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
    392}
    393
    394static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
    395{
    396	asm volatile("ptesync": : :"memory");
    397
    398	/*
    399	 * Workaround the fact that the "ric" argument to __tlbie_pid
    400	 * must be a compile-time constraint to match the "i" constraint
    401	 * in the asm statement.
    402	 */
    403	switch (ric) {
    404	case RIC_FLUSH_TLB:
    405		__tlbie_pid(pid, RIC_FLUSH_TLB);
    406		fixup_tlbie_pid(pid);
    407		break;
    408	case RIC_FLUSH_PWC:
    409		__tlbie_pid(pid, RIC_FLUSH_PWC);
    410		break;
    411	case RIC_FLUSH_ALL:
    412	default:
    413		__tlbie_pid(pid, RIC_FLUSH_ALL);
    414		fixup_tlbie_pid(pid);
    415	}
    416	asm volatile("eieio; tlbsync; ptesync": : :"memory");
    417}
    418
    419static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
    420				   unsigned long ric)
    421{
    422	asm volatile("ptesync" : : : "memory");
    423
    424	/*
    425	 * Workaround the fact that the "ric" argument to __tlbie_pid
    426	 * must be a compile-time contraint to match the "i" constraint
    427	 * in the asm statement.
    428	 */
    429	switch (ric) {
    430	case RIC_FLUSH_TLB:
    431		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
    432		fixup_tlbie_pid_lpid(pid, lpid);
    433		break;
    434	case RIC_FLUSH_PWC:
    435		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
    436		break;
    437	case RIC_FLUSH_ALL:
    438	default:
    439		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
    440		fixup_tlbie_pid_lpid(pid, lpid);
    441	}
    442	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
    443}
    444struct tlbiel_pid {
    445	unsigned long pid;
    446	unsigned long ric;
    447};
    448
    449static void do_tlbiel_pid(void *info)
    450{
    451	struct tlbiel_pid *t = info;
    452
    453	if (t->ric == RIC_FLUSH_TLB)
    454		_tlbiel_pid(t->pid, RIC_FLUSH_TLB);
    455	else if (t->ric == RIC_FLUSH_PWC)
    456		_tlbiel_pid(t->pid, RIC_FLUSH_PWC);
    457	else
    458		_tlbiel_pid(t->pid, RIC_FLUSH_ALL);
    459}
    460
    461static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
    462				unsigned long pid, unsigned long ric)
    463{
    464	struct cpumask *cpus = mm_cpumask(mm);
    465	struct tlbiel_pid t = { .pid = pid, .ric = ric };
    466
    467	on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
    468	/*
    469	 * Always want the CPU translations to be invalidated with tlbiel in
    470	 * these paths, so while coprocessors must use tlbie, we can not
    471	 * optimise away the tlbiel component.
    472	 */
    473	if (atomic_read(&mm->context.copros) > 0)
    474		_tlbie_pid(pid, RIC_FLUSH_ALL);
    475}
    476
    477static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
    478{
    479	asm volatile("ptesync": : :"memory");
    480
    481	/*
    482	 * Workaround the fact that the "ric" argument to __tlbie_pid
    483	 * must be a compile-time contraint to match the "i" constraint
    484	 * in the asm statement.
    485	 */
    486	switch (ric) {
    487	case RIC_FLUSH_TLB:
    488		__tlbie_lpid(lpid, RIC_FLUSH_TLB);
    489		fixup_tlbie_lpid(lpid);
    490		break;
    491	case RIC_FLUSH_PWC:
    492		__tlbie_lpid(lpid, RIC_FLUSH_PWC);
    493		break;
    494	case RIC_FLUSH_ALL:
    495	default:
    496		__tlbie_lpid(lpid, RIC_FLUSH_ALL);
    497		fixup_tlbie_lpid(lpid);
    498	}
    499	asm volatile("eieio; tlbsync; ptesync": : :"memory");
    500}
    501
    502static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
    503{
    504	/*
    505	 * Workaround the fact that the "ric" argument to __tlbie_pid
    506	 * must be a compile-time contraint to match the "i" constraint
    507	 * in the asm statement.
    508	 */
    509	switch (ric) {
    510	case RIC_FLUSH_TLB:
    511		__tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
    512		break;
    513	case RIC_FLUSH_PWC:
    514		__tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
    515		break;
    516	case RIC_FLUSH_ALL:
    517	default:
    518		__tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
    519	}
    520	fixup_tlbie_lpid(lpid);
    521	asm volatile("eieio; tlbsync; ptesync": : :"memory");
    522}
    523
    524static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
    525				    unsigned long pid, unsigned long page_size,
    526				    unsigned long psize)
    527{
    528	unsigned long addr;
    529	unsigned long ap = mmu_get_ap(psize);
    530
    531	for (addr = start; addr < end; addr += page_size)
    532		__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
    533}
    534
    535static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
    536				       unsigned long psize, unsigned long ric)
    537{
    538	unsigned long ap = mmu_get_ap(psize);
    539
    540	asm volatile("ptesync": : :"memory");
    541	__tlbiel_va(va, pid, ap, ric);
    542	ppc_after_tlbiel_barrier();
    543}
    544
    545static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
    546				    unsigned long pid, unsigned long page_size,
    547				    unsigned long psize, bool also_pwc)
    548{
    549	asm volatile("ptesync": : :"memory");
    550	if (also_pwc)
    551		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
    552	__tlbiel_va_range(start, end, pid, page_size, psize);
    553	ppc_after_tlbiel_barrier();
    554}
    555
    556static inline void __tlbie_va_range(unsigned long start, unsigned long end,
    557				    unsigned long pid, unsigned long page_size,
    558				    unsigned long psize)
    559{
    560	unsigned long addr;
    561	unsigned long ap = mmu_get_ap(psize);
    562
    563	for (addr = start; addr < end; addr += page_size)
    564		__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
    565
    566	fixup_tlbie_va_range(addr - page_size, pid, ap);
    567}
    568
    569static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
    570					 unsigned long pid, unsigned long lpid,
    571					 unsigned long page_size,
    572					 unsigned long psize)
    573{
    574	unsigned long addr;
    575	unsigned long ap = mmu_get_ap(psize);
    576
    577	for (addr = start; addr < end; addr += page_size)
    578		__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
    579
    580	fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
    581}
    582
    583static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
    584				      unsigned long psize, unsigned long ric)
    585{
    586	unsigned long ap = mmu_get_ap(psize);
    587
    588	asm volatile("ptesync": : :"memory");
    589	__tlbie_va(va, pid, ap, ric);
    590	fixup_tlbie_va(va, pid, ap);
    591	asm volatile("eieio; tlbsync; ptesync": : :"memory");
    592}
    593
    594struct tlbiel_va {
    595	unsigned long pid;
    596	unsigned long va;
    597	unsigned long psize;
    598	unsigned long ric;
    599};
    600
    601static void do_tlbiel_va(void *info)
    602{
    603	struct tlbiel_va *t = info;
    604
    605	if (t->ric == RIC_FLUSH_TLB)
    606		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
    607	else if (t->ric == RIC_FLUSH_PWC)
    608		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
    609	else
    610		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
    611}
    612
    613static inline void _tlbiel_va_multicast(struct mm_struct *mm,
    614				unsigned long va, unsigned long pid,
    615				unsigned long psize, unsigned long ric)
    616{
    617	struct cpumask *cpus = mm_cpumask(mm);
    618	struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
    619	on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
    620	if (atomic_read(&mm->context.copros) > 0)
    621		_tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
    622}
    623
    624struct tlbiel_va_range {
    625	unsigned long pid;
    626	unsigned long start;
    627	unsigned long end;
    628	unsigned long page_size;
    629	unsigned long psize;
    630	bool also_pwc;
    631};
    632
    633static void do_tlbiel_va_range(void *info)
    634{
    635	struct tlbiel_va_range *t = info;
    636
    637	_tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
    638				    t->psize, t->also_pwc);
    639}
    640
    641static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
    642			      unsigned long psize, unsigned long ric)
    643{
    644	unsigned long ap = mmu_get_ap(psize);
    645
    646	asm volatile("ptesync": : :"memory");
    647	__tlbie_lpid_va(va, lpid, ap, ric);
    648	fixup_tlbie_lpid_va(va, lpid, ap);
    649	asm volatile("eieio; tlbsync; ptesync": : :"memory");
    650}
    651
    652static inline void _tlbie_va_range(unsigned long start, unsigned long end,
    653				    unsigned long pid, unsigned long page_size,
    654				    unsigned long psize, bool also_pwc)
    655{
    656	asm volatile("ptesync": : :"memory");
    657	if (also_pwc)
    658		__tlbie_pid(pid, RIC_FLUSH_PWC);
    659	__tlbie_va_range(start, end, pid, page_size, psize);
    660	asm volatile("eieio; tlbsync; ptesync": : :"memory");
    661}
    662
    663static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
    664					unsigned long pid, unsigned long lpid,
    665					unsigned long page_size,
    666					unsigned long psize, bool also_pwc)
    667{
    668	asm volatile("ptesync" : : : "memory");
    669	if (also_pwc)
    670		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
    671	__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
    672	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
    673}
    674
    675static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
    676				unsigned long start, unsigned long end,
    677				unsigned long pid, unsigned long page_size,
    678				unsigned long psize, bool also_pwc)
    679{
    680	struct cpumask *cpus = mm_cpumask(mm);
    681	struct tlbiel_va_range t = { .start = start, .end = end,
    682				.pid = pid, .page_size = page_size,
    683				.psize = psize, .also_pwc = also_pwc };
    684
    685	on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
    686	if (atomic_read(&mm->context.copros) > 0)
    687		_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
    688}
    689
    690/*
    691 * Base TLB flushing operations:
    692 *
    693 *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
    694 *  - flush_tlb_page(vma, vmaddr) flushes one page
    695 *  - flush_tlb_range(vma, start, end) flushes a range of pages
    696 *  - flush_tlb_kernel_range(start, end) flushes kernel pages
    697 *
    698 *  - local_* variants of page and mm only apply to the current
    699 *    processor
    700 */
    701void radix__local_flush_tlb_mm(struct mm_struct *mm)
    702{
    703	unsigned long pid;
    704
    705	preempt_disable();
    706	pid = mm->context.id;
    707	if (pid != MMU_NO_CONTEXT)
    708		_tlbiel_pid(pid, RIC_FLUSH_TLB);
    709	preempt_enable();
    710}
    711EXPORT_SYMBOL(radix__local_flush_tlb_mm);
    712
    713#ifndef CONFIG_SMP
    714void radix__local_flush_all_mm(struct mm_struct *mm)
    715{
    716	unsigned long pid;
    717
    718	preempt_disable();
    719	pid = mm->context.id;
    720	if (pid != MMU_NO_CONTEXT)
    721		_tlbiel_pid(pid, RIC_FLUSH_ALL);
    722	preempt_enable();
    723}
    724EXPORT_SYMBOL(radix__local_flush_all_mm);
    725
    726static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
    727{
    728	radix__local_flush_all_mm(mm);
    729}
    730#endif /* CONFIG_SMP */
    731
    732void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
    733				       int psize)
    734{
    735	unsigned long pid;
    736
    737	preempt_disable();
    738	pid = mm->context.id;
    739	if (pid != MMU_NO_CONTEXT)
    740		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
    741	preempt_enable();
    742}
    743
    744void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
    745{
    746#ifdef CONFIG_HUGETLB_PAGE
    747	/* need the return fix for nohash.c */
    748	if (is_vm_hugetlb_page(vma))
    749		return radix__local_flush_hugetlb_page(vma, vmaddr);
    750#endif
    751	radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
    752}
    753EXPORT_SYMBOL(radix__local_flush_tlb_page);
    754
    755static bool mm_needs_flush_escalation(struct mm_struct *mm)
    756{
    757	/*
    758	 * P9 nest MMU has issues with the page walk cache
    759	 * caching PTEs and not flushing them properly when
    760	 * RIC = 0 for a PID/LPID invalidate
    761	 */
    762	if (atomic_read(&mm->context.copros) > 0)
    763		return true;
    764	return false;
    765}
    766
    767/*
    768 * If always_flush is true, then flush even if this CPU can't be removed
    769 * from mm_cpumask.
    770 */
    771void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
    772{
    773	unsigned long pid = mm->context.id;
    774	int cpu = smp_processor_id();
    775
    776	/*
    777	 * A kthread could have done a mmget_not_zero() after the flushing CPU
    778	 * checked mm_cpumask, and be in the process of kthread_use_mm when
    779	 * interrupted here. In that case, current->mm will be set to mm,
    780	 * because kthread_use_mm() setting ->mm and switching to the mm is
    781	 * done with interrupts off.
    782	 */
    783	if (current->mm == mm)
    784		goto out;
    785
    786	if (current->active_mm == mm) {
    787		WARN_ON_ONCE(current->mm != NULL);
    788		/* Is a kernel thread and is using mm as the lazy tlb */
    789		mmgrab(&init_mm);
    790		current->active_mm = &init_mm;
    791		switch_mm_irqs_off(mm, &init_mm, current);
    792		mmdrop(mm);
    793	}
    794
    795	/*
    796	 * This IPI may be initiated from any source including those not
    797	 * running the mm, so there may be a racing IPI that comes after
    798	 * this one which finds the cpumask already clear. Check and avoid
    799	 * underflowing the active_cpus count in that case. The race should
    800	 * not otherwise be a problem, but the TLB must be flushed because
    801	 * that's what the caller expects.
    802	 */
    803	if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
    804		atomic_dec(&mm->context.active_cpus);
    805		cpumask_clear_cpu(cpu, mm_cpumask(mm));
    806		always_flush = true;
    807	}
    808
    809out:
    810	if (always_flush)
    811		_tlbiel_pid(pid, RIC_FLUSH_ALL);
    812}
    813
    814#ifdef CONFIG_SMP
    815static void do_exit_flush_lazy_tlb(void *arg)
    816{
    817	struct mm_struct *mm = arg;
    818	exit_lazy_flush_tlb(mm, true);
    819}
    820
    821static void exit_flush_lazy_tlbs(struct mm_struct *mm)
    822{
    823	/*
    824	 * Would be nice if this was async so it could be run in
    825	 * parallel with our local flush, but generic code does not
    826	 * give a good API for it. Could extend the generic code or
    827	 * make a special powerpc IPI for flushing TLBs.
    828	 * For now it's not too performance critical.
    829	 */
    830	smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
    831				(void *)mm, 1);
    832}
    833
    834#else /* CONFIG_SMP */
    835static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
    836#endif /* CONFIG_SMP */
    837
    838static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
    839
    840/*
    841 * Interval between flushes at which we send out IPIs to check whether the
    842 * mm_cpumask can be trimmed for the case where it's not a single-threaded
    843 * process flushing its own mm. The intent is to reduce the cost of later
    844 * flushes. Don't want this to be so low that it adds noticable cost to TLB
    845 * flushing, or so high that it doesn't help reduce global TLBIEs.
    846 */
    847static unsigned long tlb_mm_cpumask_trim_timer = 1073;
    848
    849static bool tick_and_test_trim_clock(void)
    850{
    851	if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
    852			tlb_mm_cpumask_trim_timer) {
    853		__this_cpu_write(mm_cpumask_trim_clock, 0);
    854		return true;
    855	}
    856	return false;
    857}
    858
    859enum tlb_flush_type {
    860	FLUSH_TYPE_NONE,
    861	FLUSH_TYPE_LOCAL,
    862	FLUSH_TYPE_GLOBAL,
    863};
    864
    865static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
    866{
    867	int active_cpus = atomic_read(&mm->context.active_cpus);
    868	int cpu = smp_processor_id();
    869
    870	if (active_cpus == 0)
    871		return FLUSH_TYPE_NONE;
    872	if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
    873		if (current->mm != mm) {
    874			/*
    875			 * Asynchronous flush sources may trim down to nothing
    876			 * if the process is not running, so occasionally try
    877			 * to trim.
    878			 */
    879			if (tick_and_test_trim_clock()) {
    880				exit_lazy_flush_tlb(mm, true);
    881				return FLUSH_TYPE_NONE;
    882			}
    883		}
    884		return FLUSH_TYPE_LOCAL;
    885	}
    886
    887	/* Coprocessors require TLBIE to invalidate nMMU. */
    888	if (atomic_read(&mm->context.copros) > 0)
    889		return FLUSH_TYPE_GLOBAL;
    890
    891	/*
    892	 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs
    893	 * because the mm is being taken down anyway, and a TLBIE tends to
    894	 * be faster than an IPI+TLBIEL.
    895	 */
    896	if (fullmm)
    897		return FLUSH_TYPE_GLOBAL;
    898
    899	/*
    900	 * If we are running the only thread of a single-threaded process,
    901	 * then we should almost always be able to trim off the rest of the
    902	 * CPU mask (except in the case of use_mm() races), so always try
    903	 * trimming the mask.
    904	 */
    905	if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) {
    906		exit_flush_lazy_tlbs(mm);
    907		/*
    908		 * use_mm() race could prevent IPIs from being able to clear
    909		 * the cpumask here, however those users are established
    910		 * after our first check (and so after the PTEs are removed),
    911		 * and the TLB still gets flushed by the IPI, so this CPU
    912		 * will only require a local flush.
    913		 */
    914		return FLUSH_TYPE_LOCAL;
    915	}
    916
    917	/*
    918	 * Occasionally try to trim down the cpumask. It's possible this can
    919	 * bring the mask to zero, which results in no flush.
    920	 */
    921	if (tick_and_test_trim_clock()) {
    922		exit_flush_lazy_tlbs(mm);
    923		if (current->mm == mm)
    924			return FLUSH_TYPE_LOCAL;
    925		if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
    926			exit_lazy_flush_tlb(mm, true);
    927		return FLUSH_TYPE_NONE;
    928	}
    929
    930	return FLUSH_TYPE_GLOBAL;
    931}
    932
    933#ifdef CONFIG_SMP
    934void radix__flush_tlb_mm(struct mm_struct *mm)
    935{
    936	unsigned long pid;
    937	enum tlb_flush_type type;
    938
    939	pid = mm->context.id;
    940	if (unlikely(pid == MMU_NO_CONTEXT))
    941		return;
    942
    943	preempt_disable();
    944	/*
    945	 * Order loads of mm_cpumask (in flush_type_needed) vs previous
    946	 * stores to clear ptes before the invalidate. See barrier in
    947	 * switch_mm_irqs_off
    948	 */
    949	smp_mb();
    950	type = flush_type_needed(mm, false);
    951	if (type == FLUSH_TYPE_LOCAL) {
    952		_tlbiel_pid(pid, RIC_FLUSH_TLB);
    953	} else if (type == FLUSH_TYPE_GLOBAL) {
    954		if (!mmu_has_feature(MMU_FTR_GTSE)) {
    955			unsigned long tgt = H_RPTI_TARGET_CMMU;
    956
    957			if (atomic_read(&mm->context.copros) > 0)
    958				tgt |= H_RPTI_TARGET_NMMU;
    959			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
    960					       H_RPTI_PAGE_ALL, 0, -1UL);
    961		} else if (cputlb_use_tlbie()) {
    962			if (mm_needs_flush_escalation(mm))
    963				_tlbie_pid(pid, RIC_FLUSH_ALL);
    964			else
    965				_tlbie_pid(pid, RIC_FLUSH_TLB);
    966		} else {
    967			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
    968		}
    969	}
    970	preempt_enable();
    971}
    972EXPORT_SYMBOL(radix__flush_tlb_mm);
    973
    974static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
    975{
    976	unsigned long pid;
    977	enum tlb_flush_type type;
    978
    979	pid = mm->context.id;
    980	if (unlikely(pid == MMU_NO_CONTEXT))
    981		return;
    982
    983	preempt_disable();
    984	smp_mb(); /* see radix__flush_tlb_mm */
    985	type = flush_type_needed(mm, fullmm);
    986	if (type == FLUSH_TYPE_LOCAL) {
    987		_tlbiel_pid(pid, RIC_FLUSH_ALL);
    988	} else if (type == FLUSH_TYPE_GLOBAL) {
    989		if (!mmu_has_feature(MMU_FTR_GTSE)) {
    990			unsigned long tgt = H_RPTI_TARGET_CMMU;
    991			unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
    992					     H_RPTI_TYPE_PRT;
    993
    994			if (atomic_read(&mm->context.copros) > 0)
    995				tgt |= H_RPTI_TARGET_NMMU;
    996			pseries_rpt_invalidate(pid, tgt, type,
    997					       H_RPTI_PAGE_ALL, 0, -1UL);
    998		} else if (cputlb_use_tlbie())
    999			_tlbie_pid(pid, RIC_FLUSH_ALL);
   1000		else
   1001			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
   1002	}
   1003	preempt_enable();
   1004}
   1005
   1006void radix__flush_all_mm(struct mm_struct *mm)
   1007{
   1008	__flush_all_mm(mm, false);
   1009}
   1010EXPORT_SYMBOL(radix__flush_all_mm);
   1011
   1012void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
   1013				 int psize)
   1014{
   1015	unsigned long pid;
   1016	enum tlb_flush_type type;
   1017
   1018	pid = mm->context.id;
   1019	if (unlikely(pid == MMU_NO_CONTEXT))
   1020		return;
   1021
   1022	preempt_disable();
   1023	smp_mb(); /* see radix__flush_tlb_mm */
   1024	type = flush_type_needed(mm, false);
   1025	if (type == FLUSH_TYPE_LOCAL) {
   1026		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
   1027	} else if (type == FLUSH_TYPE_GLOBAL) {
   1028		if (!mmu_has_feature(MMU_FTR_GTSE)) {
   1029			unsigned long tgt, pg_sizes, size;
   1030
   1031			tgt = H_RPTI_TARGET_CMMU;
   1032			pg_sizes = psize_to_rpti_pgsize(psize);
   1033			size = 1UL << mmu_psize_to_shift(psize);
   1034
   1035			if (atomic_read(&mm->context.copros) > 0)
   1036				tgt |= H_RPTI_TARGET_NMMU;
   1037			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
   1038					       pg_sizes, vmaddr,
   1039					       vmaddr + size);
   1040		} else if (cputlb_use_tlbie())
   1041			_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
   1042		else
   1043			_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
   1044	}
   1045	preempt_enable();
   1046}
   1047
   1048void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
   1049{
   1050#ifdef CONFIG_HUGETLB_PAGE
   1051	if (is_vm_hugetlb_page(vma))
   1052		return radix__flush_hugetlb_page(vma, vmaddr);
   1053#endif
   1054	radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
   1055}
   1056EXPORT_SYMBOL(radix__flush_tlb_page);
   1057
   1058#endif /* CONFIG_SMP */
   1059
   1060static void do_tlbiel_kernel(void *info)
   1061{
   1062	_tlbiel_pid(0, RIC_FLUSH_ALL);
   1063}
   1064
   1065static inline void _tlbiel_kernel_broadcast(void)
   1066{
   1067	on_each_cpu(do_tlbiel_kernel, NULL, 1);
   1068	if (tlbie_capable) {
   1069		/*
   1070		 * Coherent accelerators don't refcount kernel memory mappings,
   1071		 * so have to always issue a tlbie for them. This is quite a
   1072		 * slow path anyway.
   1073		 */
   1074		_tlbie_pid(0, RIC_FLUSH_ALL);
   1075	}
   1076}
   1077
   1078/*
   1079 * If kernel TLBIs ever become local rather than global, then
   1080 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
   1081 * assumes kernel TLBIs are global.
   1082 */
   1083void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
   1084{
   1085	if (!mmu_has_feature(MMU_FTR_GTSE)) {
   1086		unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
   1087		unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
   1088				     H_RPTI_TYPE_PRT;
   1089
   1090		pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
   1091				       start, end);
   1092	} else if (cputlb_use_tlbie())
   1093		_tlbie_pid(0, RIC_FLUSH_ALL);
   1094	else
   1095		_tlbiel_kernel_broadcast();
   1096}
   1097EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
   1098
   1099#define TLB_FLUSH_ALL -1UL
   1100
   1101/*
   1102 * Number of pages above which we invalidate the entire PID rather than
   1103 * flush individual pages, for local and global flushes respectively.
   1104 *
   1105 * tlbie goes out to the interconnect and individual ops are more costly.
   1106 * It also does not iterate over sets like the local tlbiel variant when
   1107 * invalidating a full PID, so it has a far lower threshold to change from
   1108 * individual page flushes to full-pid flushes.
   1109 */
   1110static u32 tlb_single_page_flush_ceiling __read_mostly = 33;
   1111static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
   1112
   1113static inline void __radix__flush_tlb_range(struct mm_struct *mm,
   1114					    unsigned long start, unsigned long end)
   1115{
   1116	unsigned long pid;
   1117	unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
   1118	unsigned long page_size = 1UL << page_shift;
   1119	unsigned long nr_pages = (end - start) >> page_shift;
   1120	bool fullmm = (end == TLB_FLUSH_ALL);
   1121	bool flush_pid, flush_pwc = false;
   1122	enum tlb_flush_type type;
   1123
   1124	pid = mm->context.id;
   1125	if (unlikely(pid == MMU_NO_CONTEXT))
   1126		return;
   1127
   1128	preempt_disable();
   1129	smp_mb(); /* see radix__flush_tlb_mm */
   1130	type = flush_type_needed(mm, fullmm);
   1131	if (type == FLUSH_TYPE_NONE)
   1132		goto out;
   1133
   1134	if (fullmm)
   1135		flush_pid = true;
   1136	else if (type == FLUSH_TYPE_GLOBAL)
   1137		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
   1138	else
   1139		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
   1140	/*
   1141	 * full pid flush already does the PWC flush. if it is not full pid
   1142	 * flush check the range is more than PMD and force a pwc flush
   1143	 * mremap() depends on this behaviour.
   1144	 */
   1145	if (!flush_pid && (end - start) >= PMD_SIZE)
   1146		flush_pwc = true;
   1147
   1148	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
   1149		unsigned long type = H_RPTI_TYPE_TLB;
   1150		unsigned long tgt = H_RPTI_TARGET_CMMU;
   1151		unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
   1152
   1153		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
   1154			pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
   1155		if (atomic_read(&mm->context.copros) > 0)
   1156			tgt |= H_RPTI_TARGET_NMMU;
   1157		if (flush_pwc)
   1158			type |= H_RPTI_TYPE_PWC;
   1159		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
   1160	} else if (flush_pid) {
   1161		/*
   1162		 * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL
   1163		 */
   1164		if (type == FLUSH_TYPE_LOCAL) {
   1165			_tlbiel_pid(pid, RIC_FLUSH_ALL);
   1166		} else {
   1167			if (cputlb_use_tlbie()) {
   1168				_tlbie_pid(pid, RIC_FLUSH_ALL);
   1169			} else {
   1170				_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
   1171			}
   1172		}
   1173	} else {
   1174		bool hflush = false;
   1175		unsigned long hstart, hend;
   1176
   1177		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
   1178			hstart = (start + PMD_SIZE - 1) & PMD_MASK;
   1179			hend = end & PMD_MASK;
   1180			if (hstart < hend)
   1181				hflush = true;
   1182		}
   1183
   1184		if (type == FLUSH_TYPE_LOCAL) {
   1185			asm volatile("ptesync": : :"memory");
   1186			if (flush_pwc)
   1187				/* For PWC, only one flush is needed */
   1188				__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
   1189			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
   1190			if (hflush)
   1191				__tlbiel_va_range(hstart, hend, pid,
   1192						PMD_SIZE, MMU_PAGE_2M);
   1193			ppc_after_tlbiel_barrier();
   1194		} else if (cputlb_use_tlbie()) {
   1195			asm volatile("ptesync": : :"memory");
   1196			if (flush_pwc)
   1197				__tlbie_pid(pid, RIC_FLUSH_PWC);
   1198			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
   1199			if (hflush)
   1200				__tlbie_va_range(hstart, hend, pid,
   1201						PMD_SIZE, MMU_PAGE_2M);
   1202			asm volatile("eieio; tlbsync; ptesync": : :"memory");
   1203		} else {
   1204			_tlbiel_va_range_multicast(mm,
   1205					start, end, pid, page_size, mmu_virtual_psize, flush_pwc);
   1206			if (hflush)
   1207				_tlbiel_va_range_multicast(mm,
   1208					hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc);
   1209		}
   1210	}
   1211out:
   1212	preempt_enable();
   1213}
   1214
   1215void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
   1216		     unsigned long end)
   1217
   1218{
   1219#ifdef CONFIG_HUGETLB_PAGE
   1220	if (is_vm_hugetlb_page(vma))
   1221		return radix__flush_hugetlb_tlb_range(vma, start, end);
   1222#endif
   1223
   1224	__radix__flush_tlb_range(vma->vm_mm, start, end);
   1225}
   1226EXPORT_SYMBOL(radix__flush_tlb_range);
   1227
   1228static int radix_get_mmu_psize(int page_size)
   1229{
   1230	int psize;
   1231
   1232	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
   1233		psize = mmu_virtual_psize;
   1234	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
   1235		psize = MMU_PAGE_2M;
   1236	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
   1237		psize = MMU_PAGE_1G;
   1238	else
   1239		return -1;
   1240	return psize;
   1241}
   1242
   1243/*
   1244 * Flush partition scoped LPID address translation for all CPUs.
   1245 */
   1246void radix__flush_tlb_lpid_page(unsigned int lpid,
   1247					unsigned long addr,
   1248					unsigned long page_size)
   1249{
   1250	int psize = radix_get_mmu_psize(page_size);
   1251
   1252	_tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
   1253}
   1254EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
   1255
   1256/*
   1257 * Flush partition scoped PWC from LPID for all CPUs.
   1258 */
   1259void radix__flush_pwc_lpid(unsigned int lpid)
   1260{
   1261	_tlbie_lpid(lpid, RIC_FLUSH_PWC);
   1262}
   1263EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
   1264
   1265/*
   1266 * Flush partition scoped translations from LPID (=LPIDR)
   1267 */
   1268void radix__flush_all_lpid(unsigned int lpid)
   1269{
   1270	_tlbie_lpid(lpid, RIC_FLUSH_ALL);
   1271}
   1272EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
   1273
   1274/*
   1275 * Flush process scoped translations from LPID (=LPIDR)
   1276 */
   1277void radix__flush_all_lpid_guest(unsigned int lpid)
   1278{
   1279	_tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
   1280}
   1281
   1282void radix__tlb_flush(struct mmu_gather *tlb)
   1283{
   1284	int psize = 0;
   1285	struct mm_struct *mm = tlb->mm;
   1286	int page_size = tlb->page_size;
   1287	unsigned long start = tlb->start;
   1288	unsigned long end = tlb->end;
   1289
   1290	/*
   1291	 * if page size is not something we understand, do a full mm flush
   1292	 *
   1293	 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
   1294	 * that flushes the process table entry cache upon process teardown.
   1295	 * See the comment for radix in arch_exit_mmap().
   1296	 */
   1297	if (tlb->fullmm || tlb->need_flush_all) {
   1298		__flush_all_mm(mm, true);
   1299	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
   1300		if (!tlb->freed_tables)
   1301			radix__flush_tlb_mm(mm);
   1302		else
   1303			radix__flush_all_mm(mm);
   1304	} else {
   1305		if (!tlb->freed_tables)
   1306			radix__flush_tlb_range_psize(mm, start, end, psize);
   1307		else
   1308			radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
   1309	}
   1310}
   1311
   1312static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
   1313				unsigned long start, unsigned long end,
   1314				int psize, bool also_pwc)
   1315{
   1316	unsigned long pid;
   1317	unsigned int page_shift = mmu_psize_defs[psize].shift;
   1318	unsigned long page_size = 1UL << page_shift;
   1319	unsigned long nr_pages = (end - start) >> page_shift;
   1320	bool fullmm = (end == TLB_FLUSH_ALL);
   1321	bool flush_pid;
   1322	enum tlb_flush_type type;
   1323
   1324	pid = mm->context.id;
   1325	if (unlikely(pid == MMU_NO_CONTEXT))
   1326		return;
   1327
   1328	fullmm = (end == TLB_FLUSH_ALL);
   1329
   1330	preempt_disable();
   1331	smp_mb(); /* see radix__flush_tlb_mm */
   1332	type = flush_type_needed(mm, fullmm);
   1333	if (type == FLUSH_TYPE_NONE)
   1334		goto out;
   1335
   1336	if (fullmm)
   1337		flush_pid = true;
   1338	else if (type == FLUSH_TYPE_GLOBAL)
   1339		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
   1340	else
   1341		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
   1342
   1343	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
   1344		unsigned long tgt = H_RPTI_TARGET_CMMU;
   1345		unsigned long type = H_RPTI_TYPE_TLB;
   1346		unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
   1347
   1348		if (also_pwc)
   1349			type |= H_RPTI_TYPE_PWC;
   1350		if (atomic_read(&mm->context.copros) > 0)
   1351			tgt |= H_RPTI_TARGET_NMMU;
   1352		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
   1353	} else if (flush_pid) {
   1354		if (type == FLUSH_TYPE_LOCAL) {
   1355			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
   1356		} else {
   1357			if (cputlb_use_tlbie()) {
   1358				if (mm_needs_flush_escalation(mm))
   1359					also_pwc = true;
   1360
   1361				_tlbie_pid(pid,
   1362					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
   1363			} else {
   1364				_tlbiel_pid_multicast(mm, pid,
   1365					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
   1366			}
   1367
   1368		}
   1369	} else {
   1370		if (type == FLUSH_TYPE_LOCAL)
   1371			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
   1372		else if (cputlb_use_tlbie())
   1373			_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
   1374		else
   1375			_tlbiel_va_range_multicast(mm,
   1376					start, end, pid, page_size, psize, also_pwc);
   1377	}
   1378out:
   1379	preempt_enable();
   1380}
   1381
   1382void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
   1383				  unsigned long end, int psize)
   1384{
   1385	return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
   1386}
   1387
   1388void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
   1389				      unsigned long end, int psize)
   1390{
   1391	__radix__flush_tlb_range_psize(mm, start, end, psize, true);
   1392}
   1393
   1394#ifdef CONFIG_TRANSPARENT_HUGEPAGE
   1395void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
   1396{
   1397	unsigned long pid, end;
   1398	enum tlb_flush_type type;
   1399
   1400	pid = mm->context.id;
   1401	if (unlikely(pid == MMU_NO_CONTEXT))
   1402		return;
   1403
   1404	/* 4k page size, just blow the world */
   1405	if (PAGE_SIZE == 0x1000) {
   1406		radix__flush_all_mm(mm);
   1407		return;
   1408	}
   1409
   1410	end = addr + HPAGE_PMD_SIZE;
   1411
   1412	/* Otherwise first do the PWC, then iterate the pages. */
   1413	preempt_disable();
   1414	smp_mb(); /* see radix__flush_tlb_mm */
   1415	type = flush_type_needed(mm, false);
   1416	if (type == FLUSH_TYPE_LOCAL) {
   1417		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
   1418	} else if (type == FLUSH_TYPE_GLOBAL) {
   1419		if (!mmu_has_feature(MMU_FTR_GTSE)) {
   1420			unsigned long tgt, type, pg_sizes;
   1421
   1422			tgt = H_RPTI_TARGET_CMMU;
   1423			type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
   1424			       H_RPTI_TYPE_PRT;
   1425			pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
   1426
   1427			if (atomic_read(&mm->context.copros) > 0)
   1428				tgt |= H_RPTI_TARGET_NMMU;
   1429			pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
   1430					       addr, end);
   1431		} else if (cputlb_use_tlbie())
   1432			_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
   1433		else
   1434			_tlbiel_va_range_multicast(mm,
   1435					addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
   1436	}
   1437
   1438	preempt_enable();
   1439}
   1440#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
   1441
   1442void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
   1443				unsigned long start, unsigned long end)
   1444{
   1445	radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
   1446}
   1447EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
   1448
   1449void radix__flush_tlb_all(void)
   1450{
   1451	unsigned long rb,prs,r,rs;
   1452	unsigned long ric = RIC_FLUSH_ALL;
   1453
   1454	rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
   1455	prs = 0; /* partition scoped */
   1456	r = 1;   /* radix format */
   1457	rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
   1458
   1459	asm volatile("ptesync": : :"memory");
   1460	/*
   1461	 * now flush guest entries by passing PRS = 1 and LPID != 0
   1462	 */
   1463	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
   1464		     : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
   1465	/*
   1466	 * now flush host entires by passing PRS = 0 and LPID == 0
   1467	 */
   1468	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
   1469		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
   1470	asm volatile("eieio; tlbsync; ptesync": : :"memory");
   1471}
   1472
   1473#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
   1474/*
   1475 * Performs process-scoped invalidations for a given LPID
   1476 * as part of H_RPT_INVALIDATE hcall.
   1477 */
   1478void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
   1479			     unsigned long type, unsigned long pg_sizes,
   1480			     unsigned long start, unsigned long end)
   1481{
   1482	unsigned long psize, nr_pages;
   1483	struct mmu_psize_def *def;
   1484	bool flush_pid;
   1485
   1486	/*
   1487	 * A H_RPTI_TYPE_ALL request implies RIC=3, hence
   1488	 * do a single IS=1 based flush.
   1489	 */
   1490	if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
   1491		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
   1492		return;
   1493	}
   1494
   1495	if (type & H_RPTI_TYPE_PWC)
   1496		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
   1497
   1498	/* Full PID flush */
   1499	if (start == 0 && end == -1)
   1500		return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
   1501
   1502	/* Do range invalidation for all the valid page sizes */
   1503	for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
   1504		def = &mmu_psize_defs[psize];
   1505		if (!(pg_sizes & def->h_rpt_pgsize))
   1506			continue;
   1507
   1508		nr_pages = (end - start) >> def->shift;
   1509		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
   1510
   1511		/*
   1512		 * If the number of pages spanning the range is above
   1513		 * the ceiling, convert the request into a full PID flush.
   1514		 * And since PID flush takes out all the page sizes, there
   1515		 * is no need to consider remaining page sizes.
   1516		 */
   1517		if (flush_pid) {
   1518			_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
   1519			return;
   1520		}
   1521		_tlbie_va_range_lpid(start, end, pid, lpid,
   1522				     (1UL << def->shift), psize, false);
   1523	}
   1524}
   1525EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
   1526
   1527#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
   1528
   1529static int __init create_tlb_single_page_flush_ceiling(void)
   1530{
   1531	debugfs_create_u32("tlb_single_page_flush_ceiling", 0600,
   1532			   arch_debugfs_dir, &tlb_single_page_flush_ceiling);
   1533	debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600,
   1534			   arch_debugfs_dir, &tlb_local_single_page_flush_ceiling);
   1535	return 0;
   1536}
   1537late_initcall(create_tlb_single_page_flush_ceiling);
   1538