cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

tlbflush.h (11891B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * Based on arch/arm/include/asm/tlbflush.h
      4 *
      5 * Copyright (C) 1999-2003 Russell King
      6 * Copyright (C) 2012 ARM Ltd.
      7 */
      8#ifndef __ASM_TLBFLUSH_H
      9#define __ASM_TLBFLUSH_H
     10
     11#ifndef __ASSEMBLY__
     12
     13#include <linux/bitfield.h>
     14#include <linux/mm_types.h>
     15#include <linux/sched.h>
     16#include <asm/cputype.h>
     17#include <asm/mmu.h>
     18
     19/*
     20 * Raw TLBI operations.
     21 *
     22 * Where necessary, use the __tlbi() macro to avoid asm()
     23 * boilerplate. Drivers and most kernel code should use the TLB
     24 * management routines in preference to the macro below.
     25 *
     26 * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending
     27 * on whether a particular TLBI operation takes an argument or
     28 * not. The macros handles invoking the asm with or without the
     29 * register argument as appropriate.
     30 */
     31#define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE			       \
     32			       "tlbi " #op "\n"				       \
     33		   ALTERNATIVE("nop\n			nop",		       \
     34			       "dsb ish\n		tlbi " #op,	       \
     35			       ARM64_WORKAROUND_REPEAT_TLBI,		       \
     36			       CONFIG_ARM64_WORKAROUND_REPEAT_TLBI)	       \
     37			    : : )
     38
     39#define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE			       \
     40			       "tlbi " #op ", %0\n"			       \
     41		   ALTERNATIVE("nop\n			nop",		       \
     42			       "dsb ish\n		tlbi " #op ", %0",     \
     43			       ARM64_WORKAROUND_REPEAT_TLBI,		       \
     44			       CONFIG_ARM64_WORKAROUND_REPEAT_TLBI)	       \
     45			    : : "r" (arg))
     46
     47#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
     48
     49#define __tlbi(op, ...)		__TLBI_N(op, ##__VA_ARGS__, 1, 0)
     50
     51#define __tlbi_user(op, arg) do {						\
     52	if (arm64_kernel_unmapped_at_el0())					\
     53		__tlbi(op, (arg) | USER_ASID_FLAG);				\
     54} while (0)
     55
     56/* This macro creates a properly formatted VA operand for the TLBI */
     57#define __TLBI_VADDR(addr, asid)				\
     58	({							\
     59		unsigned long __ta = (addr) >> 12;		\
     60		__ta &= GENMASK_ULL(43, 0);			\
     61		__ta |= (unsigned long)(asid) << 48;		\
     62		__ta;						\
     63	})
     64
     65/*
     66 * Get translation granule of the system, which is decided by
     67 * PAGE_SIZE.  Used by TTL.
     68 *  - 4KB	: 1
     69 *  - 16KB	: 2
     70 *  - 64KB	: 3
     71 */
     72#define TLBI_TTL_TG_4K		1
     73#define TLBI_TTL_TG_16K		2
     74#define TLBI_TTL_TG_64K		3
     75
     76static inline unsigned long get_trans_granule(void)
     77{
     78	switch (PAGE_SIZE) {
     79	case SZ_4K:
     80		return TLBI_TTL_TG_4K;
     81	case SZ_16K:
     82		return TLBI_TTL_TG_16K;
     83	case SZ_64K:
     84		return TLBI_TTL_TG_64K;
     85	default:
     86		return 0;
     87	}
     88}
     89
     90/*
     91 * Level-based TLBI operations.
     92 *
     93 * When ARMv8.4-TTL exists, TLBI operations take an additional hint for
     94 * the level at which the invalidation must take place. If the level is
     95 * wrong, no invalidation may take place. In the case where the level
     96 * cannot be easily determined, a 0 value for the level parameter will
     97 * perform a non-hinted invalidation.
     98 *
     99 * For Stage-2 invalidation, use the level values provided to that effect
    100 * in asm/stage2_pgtable.h.
    101 */
    102#define TLBI_TTL_MASK		GENMASK_ULL(47, 44)
    103
    104#define __tlbi_level(op, addr, level) do {				\
    105	u64 arg = addr;							\
    106									\
    107	if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) &&		\
    108	    level) {							\
    109		u64 ttl = level & 3;					\
    110		ttl |= get_trans_granule() << 2;			\
    111		arg &= ~TLBI_TTL_MASK;					\
    112		arg |= FIELD_PREP(TLBI_TTL_MASK, ttl);			\
    113	}								\
    114									\
    115	__tlbi(op, arg);						\
    116} while(0)
    117
    118#define __tlbi_user_level(op, arg, level) do {				\
    119	if (arm64_kernel_unmapped_at_el0())				\
    120		__tlbi_level(op, (arg | USER_ASID_FLAG), level);	\
    121} while (0)
    122
    123/*
    124 * This macro creates a properly formatted VA operand for the TLB RANGE.
    125 * The value bit assignments are:
    126 *
    127 * +----------+------+-------+-------+-------+----------------------+
    128 * |   ASID   |  TG  | SCALE |  NUM  |  TTL  |        BADDR         |
    129 * +-----------------+-------+-------+-------+----------------------+
    130 * |63      48|47  46|45   44|43   39|38   37|36                   0|
    131 *
    132 * The address range is determined by below formula:
    133 * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
    134 *
    135 */
    136#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl)		\
    137	({							\
    138		unsigned long __ta = (addr) >> PAGE_SHIFT;	\
    139		__ta &= GENMASK_ULL(36, 0);			\
    140		__ta |= (unsigned long)(ttl) << 37;		\
    141		__ta |= (unsigned long)(num) << 39;		\
    142		__ta |= (unsigned long)(scale) << 44;		\
    143		__ta |= get_trans_granule() << 46;		\
    144		__ta |= (unsigned long)(asid) << 48;		\
    145		__ta;						\
    146	})
    147
    148/* These macros are used by the TLBI RANGE feature. */
    149#define __TLBI_RANGE_PAGES(num, scale)	\
    150	((unsigned long)((num) + 1) << (5 * (scale) + 1))
    151#define MAX_TLBI_RANGE_PAGES		__TLBI_RANGE_PAGES(31, 3)
    152
    153/*
    154 * Generate 'num' values from -1 to 30 with -1 rejected by the
    155 * __flush_tlb_range() loop below.
    156 */
    157#define TLBI_RANGE_MASK			GENMASK_ULL(4, 0)
    158#define __TLBI_RANGE_NUM(pages, scale)	\
    159	((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
    160
    161/*
    162 *	TLB Invalidation
    163 *	================
    164 *
    165 * 	This header file implements the low-level TLB invalidation routines
    166 *	(sometimes referred to as "flushing" in the kernel) for arm64.
    167 *
    168 *	Every invalidation operation uses the following template:
    169 *
    170 *	DSB ISHST	// Ensure prior page-table updates have completed
    171 *	TLBI ...	// Invalidate the TLB
    172 *	DSB ISH		// Ensure the TLB invalidation has completed
    173 *      if (invalidated kernel mappings)
    174 *		ISB	// Discard any instructions fetched from the old mapping
    175 *
    176 *
    177 *	The following functions form part of the "core" TLB invalidation API,
    178 *	as documented in Documentation/core-api/cachetlb.rst:
    179 *
    180 *	flush_tlb_all()
    181 *		Invalidate the entire TLB (kernel + user) on all CPUs
    182 *
    183 *	flush_tlb_mm(mm)
    184 *		Invalidate an entire user address space on all CPUs.
    185 *		The 'mm' argument identifies the ASID to invalidate.
    186 *
    187 *	flush_tlb_range(vma, start, end)
    188 *		Invalidate the virtual-address range '[start, end)' on all
    189 *		CPUs for the user address space corresponding to 'vma->mm'.
    190 *		Note that this operation also invalidates any walk-cache
    191 *		entries associated with translations for the specified address
    192 *		range.
    193 *
    194 *	flush_tlb_kernel_range(start, end)
    195 *		Same as flush_tlb_range(..., start, end), but applies to
    196 * 		kernel mappings rather than a particular user address space.
    197 *		Whilst not explicitly documented, this function is used when
    198 *		unmapping pages from vmalloc/io space.
    199 *
    200 *	flush_tlb_page(vma, addr)
    201 *		Invalidate a single user mapping for address 'addr' in the
    202 *		address space corresponding to 'vma->mm'.  Note that this
    203 *		operation only invalidates a single, last-level page-table
    204 *		entry and therefore does not affect any walk-caches.
    205 *
    206 *
    207 *	Next, we have some undocumented invalidation routines that you probably
    208 *	don't want to call unless you know what you're doing:
    209 *
    210 *	local_flush_tlb_all()
    211 *		Same as flush_tlb_all(), but only applies to the calling CPU.
    212 *
    213 *	__flush_tlb_kernel_pgtable(addr)
    214 *		Invalidate a single kernel mapping for address 'addr' on all
    215 *		CPUs, ensuring that any walk-cache entries associated with the
    216 *		translation are also invalidated.
    217 *
    218 *	__flush_tlb_range(vma, start, end, stride, last_level)
    219 *		Invalidate the virtual-address range '[start, end)' on all
    220 *		CPUs for the user address space corresponding to 'vma->mm'.
    221 *		The invalidation operations are issued at a granularity
    222 *		determined by 'stride' and only affect any walk-cache entries
    223 *		if 'last_level' is equal to false.
    224 *
    225 *
    226 *	Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
    227 *	on top of these routines, since that is our interface to the mmu_gather
    228 *	API as used by munmap() and friends.
    229 */
    230static inline void local_flush_tlb_all(void)
    231{
    232	dsb(nshst);
    233	__tlbi(vmalle1);
    234	dsb(nsh);
    235	isb();
    236}
    237
    238static inline void flush_tlb_all(void)
    239{
    240	dsb(ishst);
    241	__tlbi(vmalle1is);
    242	dsb(ish);
    243	isb();
    244}
    245
    246static inline void flush_tlb_mm(struct mm_struct *mm)
    247{
    248	unsigned long asid;
    249
    250	dsb(ishst);
    251	asid = __TLBI_VADDR(0, ASID(mm));
    252	__tlbi(aside1is, asid);
    253	__tlbi_user(aside1is, asid);
    254	dsb(ish);
    255}
    256
    257static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
    258					 unsigned long uaddr)
    259{
    260	unsigned long addr;
    261
    262	dsb(ishst);
    263	addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
    264	__tlbi(vale1is, addr);
    265	__tlbi_user(vale1is, addr);
    266}
    267
    268static inline void flush_tlb_page(struct vm_area_struct *vma,
    269				  unsigned long uaddr)
    270{
    271	flush_tlb_page_nosync(vma, uaddr);
    272	dsb(ish);
    273}
    274
    275/*
    276 * This is meant to avoid soft lock-ups on large TLB flushing ranges and not
    277 * necessarily a performance improvement.
    278 */
    279#define MAX_TLBI_OPS	PTRS_PER_PTE
    280
    281static inline void __flush_tlb_range(struct vm_area_struct *vma,
    282				     unsigned long start, unsigned long end,
    283				     unsigned long stride, bool last_level,
    284				     int tlb_level)
    285{
    286	int num = 0;
    287	int scale = 0;
    288	unsigned long asid, addr, pages;
    289
    290	start = round_down(start, stride);
    291	end = round_up(end, stride);
    292	pages = (end - start) >> PAGE_SHIFT;
    293
    294	/*
    295	 * When not uses TLB range ops, we can handle up to
    296	 * (MAX_TLBI_OPS - 1) pages;
    297	 * When uses TLB range ops, we can handle up to
    298	 * (MAX_TLBI_RANGE_PAGES - 1) pages.
    299	 */
    300	if ((!system_supports_tlb_range() &&
    301	     (end - start) >= (MAX_TLBI_OPS * stride)) ||
    302	    pages >= MAX_TLBI_RANGE_PAGES) {
    303		flush_tlb_mm(vma->vm_mm);
    304		return;
    305	}
    306
    307	dsb(ishst);
    308	asid = ASID(vma->vm_mm);
    309
    310	/*
    311	 * When the CPU does not support TLB range operations, flush the TLB
    312	 * entries one by one at the granularity of 'stride'. If the TLB
    313	 * range ops are supported, then:
    314	 *
    315	 * 1. If 'pages' is odd, flush the first page through non-range
    316	 *    operations;
    317	 *
    318	 * 2. For remaining pages: the minimum range granularity is decided
    319	 *    by 'scale', so multiple range TLBI operations may be required.
    320	 *    Start from scale = 0, flush the corresponding number of pages
    321	 *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
    322	 *    until no pages left.
    323	 *
    324	 * Note that certain ranges can be represented by either num = 31 and
    325	 * scale or num = 0 and scale + 1. The loop below favours the latter
    326	 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
    327	 */
    328	while (pages > 0) {
    329		if (!system_supports_tlb_range() ||
    330		    pages % 2 == 1) {
    331			addr = __TLBI_VADDR(start, asid);
    332			if (last_level) {
    333				__tlbi_level(vale1is, addr, tlb_level);
    334				__tlbi_user_level(vale1is, addr, tlb_level);
    335			} else {
    336				__tlbi_level(vae1is, addr, tlb_level);
    337				__tlbi_user_level(vae1is, addr, tlb_level);
    338			}
    339			start += stride;
    340			pages -= stride >> PAGE_SHIFT;
    341			continue;
    342		}
    343
    344		num = __TLBI_RANGE_NUM(pages, scale);
    345		if (num >= 0) {
    346			addr = __TLBI_VADDR_RANGE(start, asid, scale,
    347						  num, tlb_level);
    348			if (last_level) {
    349				__tlbi(rvale1is, addr);
    350				__tlbi_user(rvale1is, addr);
    351			} else {
    352				__tlbi(rvae1is, addr);
    353				__tlbi_user(rvae1is, addr);
    354			}
    355			start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
    356			pages -= __TLBI_RANGE_PAGES(num, scale);
    357		}
    358		scale++;
    359	}
    360	dsb(ish);
    361}
    362
    363static inline void flush_tlb_range(struct vm_area_struct *vma,
    364				   unsigned long start, unsigned long end)
    365{
    366	/*
    367	 * We cannot use leaf-only invalidation here, since we may be invalidating
    368	 * table entries as part of collapsing hugepages or moving page tables.
    369	 * Set the tlb_level to 0 because we can not get enough information here.
    370	 */
    371	__flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
    372}
    373
    374static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
    375{
    376	unsigned long addr;
    377
    378	if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) {
    379		flush_tlb_all();
    380		return;
    381	}
    382
    383	start = __TLBI_VADDR(start, 0);
    384	end = __TLBI_VADDR(end, 0);
    385
    386	dsb(ishst);
    387	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
    388		__tlbi(vaale1is, addr);
    389	dsb(ish);
    390	isb();
    391}
    392
    393/*
    394 * Used to invalidate the TLB (walk caches) corresponding to intermediate page
    395 * table levels (pgd/pud/pmd).
    396 */
    397static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
    398{
    399	unsigned long addr = __TLBI_VADDR(kaddr, 0);
    400
    401	dsb(ishst);
    402	__tlbi(vaae1is, addr);
    403	dsb(ish);
    404	isb();
    405}
    406#endif
    407
    408#endif