cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

pgtable.h (43029B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
      3#define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
      4
      5#include <asm-generic/pgtable-nop4d.h>
      6
      7#ifndef __ASSEMBLY__
      8#include <linux/mmdebug.h>
      9#include <linux/bug.h>
     10#include <linux/sizes.h>
     11#endif
     12
     13/*
     14 * Common bits between hash and Radix page table
     15 */
     16
     17#define _PAGE_EXEC		0x00001 /* execute permission */
     18#define _PAGE_WRITE		0x00002 /* write access allowed */
     19#define _PAGE_READ		0x00004	/* read access allowed */
     20#define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
     21#define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
     22#define _PAGE_PRIVILEGED	0x00008 /* kernel access only */
     23#define _PAGE_SAO		0x00010 /* Strong access order */
     24#define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
     25#define _PAGE_TOLERANT		0x00030 /* tolerant memory, cache inhibited */
     26#define _PAGE_DIRTY		0x00080 /* C: page changed */
     27#define _PAGE_ACCESSED		0x00100 /* R: page referenced */
     28/*
     29 * Software bits
     30 */
     31#define _RPAGE_SW0		0x2000000000000000UL
     32#define _RPAGE_SW1		0x00800
     33#define _RPAGE_SW2		0x00400
     34#define _RPAGE_SW3		0x00200
     35#define _RPAGE_RSV1		0x00040UL
     36
     37#define _RPAGE_PKEY_BIT4	0x1000000000000000UL
     38#define _RPAGE_PKEY_BIT3	0x0800000000000000UL
     39#define _RPAGE_PKEY_BIT2	0x0400000000000000UL
     40#define _RPAGE_PKEY_BIT1	0x0200000000000000UL
     41#define _RPAGE_PKEY_BIT0	0x0100000000000000UL
     42
     43#define _PAGE_PTE		0x4000000000000000UL	/* distinguishes PTEs from pointers */
     44#define _PAGE_PRESENT		0x8000000000000000UL	/* pte contains a translation */
     45/*
     46 * We need to mark a pmd pte invalid while splitting. We can do that by clearing
     47 * the _PAGE_PRESENT bit. But then that will be taken as a swap pte. In order to
     48 * differentiate between two use a SW field when invalidating.
     49 *
     50 * We do that temporary invalidate for regular pte entry in ptep_set_access_flags
     51 *
     52 * This is used only when _PAGE_PRESENT is cleared.
     53 */
     54#define _PAGE_INVALID		_RPAGE_SW0
     55
     56/*
     57 * Top and bottom bits of RPN which can be used by hash
     58 * translation mode, because we expect them to be zero
     59 * otherwise.
     60 */
     61#define _RPAGE_RPN0		0x01000
     62#define _RPAGE_RPN1		0x02000
     63#define _RPAGE_RPN43		0x0080000000000000UL
     64#define _RPAGE_RPN42		0x0040000000000000UL
     65#define _RPAGE_RPN41		0x0020000000000000UL
     66
     67/* Max physical address bit as per radix table */
     68#define _RPAGE_PA_MAX		56
     69
     70/*
     71 * Max physical address bit we will use for now.
     72 *
     73 * This is mostly a hardware limitation and for now Power9 has
     74 * a 51 bit limit.
     75 *
     76 * This is different from the number of physical bit required to address
     77 * the last byte of memory. That is defined by MAX_PHYSMEM_BITS.
     78 * MAX_PHYSMEM_BITS is a linux limitation imposed by the maximum
     79 * number of sections we can support (SECTIONS_SHIFT).
     80 *
     81 * This is different from Radix page table limitation above and
     82 * should always be less than that. The limit is done such that
     83 * we can overload the bits between _RPAGE_PA_MAX and _PAGE_PA_MAX
     84 * for hash linux page table specific bits.
     85 *
     86 * In order to be compatible with future hardware generations we keep
     87 * some offsets and limit this for now to 53
     88 */
     89#define _PAGE_PA_MAX		53
     90
     91#define _PAGE_SOFT_DIRTY	_RPAGE_SW3 /* software: software dirty tracking */
     92#define _PAGE_SPECIAL		_RPAGE_SW2 /* software: special page */
     93#define _PAGE_DEVMAP		_RPAGE_SW1 /* software: ZONE_DEVICE page */
     94
     95/*
     96 * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
     97 * Instead of fixing all of them, add an alternate define which
     98 * maps CI pte mapping.
     99 */
    100#define _PAGE_NO_CACHE		_PAGE_TOLERANT
    101/*
    102 * We support _RPAGE_PA_MAX bit real address in pte. On the linux side
    103 * we are limited by _PAGE_PA_MAX. Clear everything above _PAGE_PA_MAX
    104 * and every thing below PAGE_SHIFT;
    105 */
    106#define PTE_RPN_MASK	(((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK))
    107/*
    108 * set of bits not changed in pmd_modify. Even though we have hash specific bits
    109 * in here, on radix we expect them to be zero.
    110 */
    111#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
    112			 _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
    113			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
    114/*
    115 * user access blocked by key
    116 */
    117#define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
    118#define _PAGE_KERNEL_RO		 (_PAGE_PRIVILEGED | _PAGE_READ)
    119#define _PAGE_KERNEL_ROX	 (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC)
    120#define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY |	\
    121				 _PAGE_RW | _PAGE_EXEC)
    122/*
    123 * _PAGE_CHG_MASK masks of bits that are to be preserved across
    124 * pgprot changes
    125 */
    126#define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
    127			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE |	\
    128			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
    129
    130/*
    131 * We define 2 sets of base prot bits, one for basic pages (ie,
    132 * cacheable kernel and user pages) and one for non cacheable
    133 * pages. We always set _PAGE_COHERENT when SMP is enabled or
    134 * the processor might need it for DMA coherency.
    135 */
    136#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED)
    137#define _PAGE_BASE	(_PAGE_BASE_NC)
    138
    139/* Permission masks used to generate the __P and __S table,
    140 *
    141 * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
    142 *
    143 * Write permissions imply read permissions for now (we could make write-only
    144 * pages on BookE but we don't bother for now). Execute permission control is
    145 * possible on platforms that define _PAGE_EXEC
    146 */
    147#define PAGE_NONE	__pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
    148#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_RW)
    149#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
    150#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_READ)
    151#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
    152#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_READ)
    153#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
    154
    155/* Permission masks used for kernel mappings */
    156#define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
    157#define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
    158				 _PAGE_TOLERANT)
    159#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
    160				 _PAGE_NON_IDEMPOTENT)
    161#define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
    162#define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
    163#define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
    164
    165/*
    166 * Protection used for kernel text. We want the debuggers to be able to
    167 * set breakpoints anywhere, so don't write protect the kernel text
    168 * on platforms where such control is possible.
    169 */
    170#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) || \
    171	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
    172#define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
    173#else
    174#define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
    175#endif
    176
    177/* Make modules code happy. We don't set RO yet */
    178#define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
    179#define PAGE_AGP		(PAGE_KERNEL_NC)
    180
    181#ifndef __ASSEMBLY__
    182/*
    183 * page table defines
    184 */
    185extern unsigned long __pte_index_size;
    186extern unsigned long __pmd_index_size;
    187extern unsigned long __pud_index_size;
    188extern unsigned long __pgd_index_size;
    189extern unsigned long __pud_cache_index;
    190#define PTE_INDEX_SIZE  __pte_index_size
    191#define PMD_INDEX_SIZE  __pmd_index_size
    192#define PUD_INDEX_SIZE  __pud_index_size
    193#define PGD_INDEX_SIZE  __pgd_index_size
    194/* pmd table use page table fragments */
    195#define PMD_CACHE_INDEX  0
    196#define PUD_CACHE_INDEX __pud_cache_index
    197/*
    198 * Because of use of pte fragments and THP, size of page table
    199 * are not always derived out of index size above.
    200 */
    201extern unsigned long __pte_table_size;
    202extern unsigned long __pmd_table_size;
    203extern unsigned long __pud_table_size;
    204extern unsigned long __pgd_table_size;
    205#define PTE_TABLE_SIZE	__pte_table_size
    206#define PMD_TABLE_SIZE	__pmd_table_size
    207#define PUD_TABLE_SIZE	__pud_table_size
    208#define PGD_TABLE_SIZE	__pgd_table_size
    209
    210extern unsigned long __pmd_val_bits;
    211extern unsigned long __pud_val_bits;
    212extern unsigned long __pgd_val_bits;
    213#define PMD_VAL_BITS	__pmd_val_bits
    214#define PUD_VAL_BITS	__pud_val_bits
    215#define PGD_VAL_BITS	__pgd_val_bits
    216
    217extern unsigned long __pte_frag_nr;
    218#define PTE_FRAG_NR __pte_frag_nr
    219extern unsigned long __pte_frag_size_shift;
    220#define PTE_FRAG_SIZE_SHIFT __pte_frag_size_shift
    221#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
    222
    223extern unsigned long __pmd_frag_nr;
    224#define PMD_FRAG_NR __pmd_frag_nr
    225extern unsigned long __pmd_frag_size_shift;
    226#define PMD_FRAG_SIZE_SHIFT __pmd_frag_size_shift
    227#define PMD_FRAG_SIZE (1UL << PMD_FRAG_SIZE_SHIFT)
    228
    229#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
    230#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
    231#define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
    232#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
    233
    234#define MAX_PTRS_PER_PTE ((H_PTRS_PER_PTE > R_PTRS_PER_PTE) ? H_PTRS_PER_PTE : R_PTRS_PER_PTE)
    235#define MAX_PTRS_PER_PMD ((H_PTRS_PER_PMD > R_PTRS_PER_PMD) ? H_PTRS_PER_PMD : R_PTRS_PER_PMD)
    236#define MAX_PTRS_PER_PUD ((H_PTRS_PER_PUD > R_PTRS_PER_PUD) ? H_PTRS_PER_PUD : R_PTRS_PER_PUD)
    237#define MAX_PTRS_PER_PGD	(1 << (H_PGD_INDEX_SIZE > RADIX_PGD_INDEX_SIZE ? \
    238				       H_PGD_INDEX_SIZE : RADIX_PGD_INDEX_SIZE))
    239
    240/* PMD_SHIFT determines what a second-level page table entry can map */
    241#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
    242#define PMD_SIZE	(1UL << PMD_SHIFT)
    243#define PMD_MASK	(~(PMD_SIZE-1))
    244
    245/* PUD_SHIFT determines what a third-level page table entry can map */
    246#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
    247#define PUD_SIZE	(1UL << PUD_SHIFT)
    248#define PUD_MASK	(~(PUD_SIZE-1))
    249
    250/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
    251#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
    252#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
    253#define PGDIR_MASK	(~(PGDIR_SIZE-1))
    254
    255/* Bits to mask out from a PMD to get to the PTE page */
    256#define PMD_MASKED_BITS		0xc0000000000000ffUL
    257/* Bits to mask out from a PUD to get to the PMD page */
    258#define PUD_MASKED_BITS		0xc0000000000000ffUL
    259/* Bits to mask out from a PGD to get to the PUD page */
    260#define P4D_MASKED_BITS		0xc0000000000000ffUL
    261
    262/*
    263 * Used as an indicator for rcu callback functions
    264 */
    265enum pgtable_index {
    266	PTE_INDEX = 0,
    267	PMD_INDEX,
    268	PUD_INDEX,
    269	PGD_INDEX,
    270	/*
    271	 * Below are used with 4k page size and hugetlb
    272	 */
    273	HTLB_16M_INDEX,
    274	HTLB_16G_INDEX,
    275};
    276
    277extern unsigned long __vmalloc_start;
    278extern unsigned long __vmalloc_end;
    279#define VMALLOC_START	__vmalloc_start
    280#define VMALLOC_END	__vmalloc_end
    281
    282static inline unsigned int ioremap_max_order(void)
    283{
    284	if (radix_enabled())
    285		return PUD_SHIFT;
    286	return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
    287}
    288#define IOREMAP_MAX_ORDER ioremap_max_order()
    289
    290extern unsigned long __kernel_virt_start;
    291extern unsigned long __kernel_io_start;
    292extern unsigned long __kernel_io_end;
    293#define KERN_VIRT_START __kernel_virt_start
    294#define KERN_IO_START  __kernel_io_start
    295#define KERN_IO_END __kernel_io_end
    296
    297extern struct page *vmemmap;
    298extern unsigned long pci_io_base;
    299#endif /* __ASSEMBLY__ */
    300
    301#include <asm/book3s/64/hash.h>
    302#include <asm/book3s/64/radix.h>
    303
    304#if H_MAX_PHYSMEM_BITS > R_MAX_PHYSMEM_BITS
    305#define  MAX_PHYSMEM_BITS	H_MAX_PHYSMEM_BITS
    306#else
    307#define  MAX_PHYSMEM_BITS	R_MAX_PHYSMEM_BITS
    308#endif
    309
    310
    311#ifdef CONFIG_PPC_64K_PAGES
    312#include <asm/book3s/64/pgtable-64k.h>
    313#else
    314#include <asm/book3s/64/pgtable-4k.h>
    315#endif
    316
    317#include <asm/barrier.h>
    318/*
    319 * IO space itself carved into the PIO region (ISA and PHB IO space) and
    320 * the ioremap space
    321 *
    322 *  ISA_IO_BASE = KERN_IO_START, 64K reserved area
    323 *  PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
    324 * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
    325 */
    326#define FULL_IO_SIZE	0x80000000ul
    327#define  ISA_IO_BASE	(KERN_IO_START)
    328#define  ISA_IO_END	(KERN_IO_START + 0x10000ul)
    329#define  PHB_IO_BASE	(ISA_IO_END)
    330#define  PHB_IO_END	(KERN_IO_START + FULL_IO_SIZE)
    331#define IOREMAP_BASE	(PHB_IO_END)
    332#define IOREMAP_START	(ioremap_bot)
    333#define IOREMAP_END	(KERN_IO_END - FIXADDR_SIZE)
    334#define FIXADDR_SIZE	SZ_32M
    335
    336/* Advertise special mapping type for AGP */
    337#define HAVE_PAGE_AGP
    338
    339#ifndef __ASSEMBLY__
    340
    341/*
    342 * This is the default implementation of various PTE accessors, it's
    343 * used in all cases except Book3S with 64K pages where we have a
    344 * concept of sub-pages
    345 */
    346#ifndef __real_pte
    347
    348#define __real_pte(e, p, o)		((real_pte_t){(e)})
    349#define __rpte_to_pte(r)	((r).pte)
    350#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
    351
    352#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
    353	do {							         \
    354		index = 0;					         \
    355		shift = mmu_psize_defs[psize].shift;		         \
    356
    357#define pte_iterate_hashed_end() } while(0)
    358
    359/*
    360 * We expect this to be called only for user addresses or kernel virtual
    361 * addresses other than the linear mapping.
    362 */
    363#define pte_pagesize_index(mm, addr, pte)	MMU_PAGE_4K
    364
    365#endif /* __real_pte */
    366
    367static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr,
    368				       pte_t *ptep, unsigned long clr,
    369				       unsigned long set, int huge)
    370{
    371	if (radix_enabled())
    372		return radix__pte_update(mm, addr, ptep, clr, set, huge);
    373	return hash__pte_update(mm, addr, ptep, clr, set, huge);
    374}
    375/*
    376 * For hash even if we have _PAGE_ACCESSED = 0, we do a pte_update.
    377 * We currently remove entries from the hashtable regardless of whether
    378 * the entry was young or dirty.
    379 *
    380 * We should be more intelligent about this but for the moment we override
    381 * these functions and force a tlb flush unconditionally
    382 * For radix: H_PAGE_HASHPTE should be zero. Hence we can use the same
    383 * function for both hash and radix.
    384 */
    385static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
    386					      unsigned long addr, pte_t *ptep)
    387{
    388	unsigned long old;
    389
    390	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
    391		return 0;
    392	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
    393	return (old & _PAGE_ACCESSED) != 0;
    394}
    395
    396#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
    397#define ptep_test_and_clear_young(__vma, __addr, __ptep)	\
    398({								\
    399	__ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
    400})
    401
    402/*
    403 * On Book3S CPUs, clearing the accessed bit without a TLB flush
    404 * doesn't cause data corruption. [ It could cause incorrect
    405 * page aging and the (mistaken) reclaim of hot pages, but the
    406 * chance of that should be relatively low. ]
    407 *
    408 * So as a performance optimization don't flush the TLB when
    409 * clearing the accessed bit, it will eventually be flushed by
    410 * a context switch or a VM operation anyway. [ In the rare
    411 * event of it not getting flushed for a long time the delay
    412 * shouldn't really matter because there's no real memory
    413 * pressure for swapout to react to. ]
    414 */
    415#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
    416#define ptep_clear_flush_young ptep_test_and_clear_young
    417
    418#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
    419#define pmdp_clear_flush_young pmdp_test_and_clear_young
    420
    421static inline int __pte_write(pte_t pte)
    422{
    423	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
    424}
    425
    426#ifdef CONFIG_NUMA_BALANCING
    427#define pte_savedwrite pte_savedwrite
    428static inline bool pte_savedwrite(pte_t pte)
    429{
    430	/*
    431	 * Saved write ptes are prot none ptes that doesn't have
    432	 * privileged bit sit. We mark prot none as one which has
    433	 * present and pviliged bit set and RWX cleared. To mark
    434	 * protnone which used to have _PAGE_WRITE set we clear
    435	 * the privileged bit.
    436	 */
    437	return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
    438}
    439#else
    440#define pte_savedwrite pte_savedwrite
    441static inline bool pte_savedwrite(pte_t pte)
    442{
    443	return false;
    444}
    445#endif
    446
    447static inline int pte_write(pte_t pte)
    448{
    449	return __pte_write(pte) || pte_savedwrite(pte);
    450}
    451
    452static inline int pte_read(pte_t pte)
    453{
    454	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_READ));
    455}
    456
    457#define __HAVE_ARCH_PTEP_SET_WRPROTECT
    458static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
    459				      pte_t *ptep)
    460{
    461	if (__pte_write(*ptep))
    462		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
    463	else if (unlikely(pte_savedwrite(*ptep)))
    464		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
    465}
    466
    467#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
    468static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
    469					   unsigned long addr, pte_t *ptep)
    470{
    471	/*
    472	 * We should not find protnone for hugetlb, but this complete the
    473	 * interface.
    474	 */
    475	if (__pte_write(*ptep))
    476		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
    477	else if (unlikely(pte_savedwrite(*ptep)))
    478		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1);
    479}
    480
    481#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
    482static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
    483				       unsigned long addr, pte_t *ptep)
    484{
    485	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
    486	return __pte(old);
    487}
    488
    489#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
    490static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
    491					    unsigned long addr,
    492					    pte_t *ptep, int full)
    493{
    494	if (full && radix_enabled()) {
    495		/*
    496		 * We know that this is a full mm pte clear and
    497		 * hence can be sure there is no parallel set_pte.
    498		 */
    499		return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
    500	}
    501	return ptep_get_and_clear(mm, addr, ptep);
    502}
    503
    504
    505static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
    506			     pte_t * ptep)
    507{
    508	pte_update(mm, addr, ptep, ~0UL, 0, 0);
    509}
    510
    511static inline int pte_dirty(pte_t pte)
    512{
    513	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY));
    514}
    515
    516static inline int pte_young(pte_t pte)
    517{
    518	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_ACCESSED));
    519}
    520
    521static inline int pte_special(pte_t pte)
    522{
    523	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL));
    524}
    525
    526static inline bool pte_exec(pte_t pte)
    527{
    528	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_EXEC));
    529}
    530
    531
    532#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
    533static inline bool pte_soft_dirty(pte_t pte)
    534{
    535	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SOFT_DIRTY));
    536}
    537
    538static inline pte_t pte_mksoft_dirty(pte_t pte)
    539{
    540	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SOFT_DIRTY));
    541}
    542
    543static inline pte_t pte_clear_soft_dirty(pte_t pte)
    544{
    545	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SOFT_DIRTY));
    546}
    547#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
    548
    549#ifdef CONFIG_NUMA_BALANCING
    550static inline int pte_protnone(pte_t pte)
    551{
    552	return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE | _PAGE_RWX)) ==
    553		cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
    554}
    555
    556#define pte_mk_savedwrite pte_mk_savedwrite
    557static inline pte_t pte_mk_savedwrite(pte_t pte)
    558{
    559	/*
    560	 * Used by Autonuma subsystem to preserve the write bit
    561	 * while marking the pte PROT_NONE. Only allow this
    562	 * on PROT_NONE pte
    563	 */
    564	VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) !=
    565		  cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED));
    566	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
    567}
    568
    569#define pte_clear_savedwrite pte_clear_savedwrite
    570static inline pte_t pte_clear_savedwrite(pte_t pte)
    571{
    572	/*
    573	 * Used by KSM subsystem to make a protnone pte readonly.
    574	 */
    575	VM_BUG_ON(!pte_protnone(pte));
    576	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
    577}
    578#else
    579#define pte_clear_savedwrite pte_clear_savedwrite
    580static inline pte_t pte_clear_savedwrite(pte_t pte)
    581{
    582	VM_WARN_ON(1);
    583	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
    584}
    585#endif /* CONFIG_NUMA_BALANCING */
    586
    587static inline bool pte_hw_valid(pte_t pte)
    588{
    589	return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE)) ==
    590		cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
    591}
    592
    593static inline int pte_present(pte_t pte)
    594{
    595	/*
    596	 * A pte is considerent present if _PAGE_PRESENT is set.
    597	 * We also need to consider the pte present which is marked
    598	 * invalid during ptep_set_access_flags. Hence we look for _PAGE_INVALID
    599	 * if we find _PAGE_PRESENT cleared.
    600	 */
    601
    602	if (pte_hw_valid(pte))
    603		return true;
    604	return (pte_raw(pte) & cpu_to_be64(_PAGE_INVALID | _PAGE_PTE)) ==
    605		cpu_to_be64(_PAGE_INVALID | _PAGE_PTE);
    606}
    607
    608#ifdef CONFIG_PPC_MEM_KEYS
    609extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
    610#else
    611static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
    612{
    613	return true;
    614}
    615#endif /* CONFIG_PPC_MEM_KEYS */
    616
    617static inline bool pte_user(pte_t pte)
    618{
    619	return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
    620}
    621
    622#define pte_access_permitted pte_access_permitted
    623static inline bool pte_access_permitted(pte_t pte, bool write)
    624{
    625	/*
    626	 * _PAGE_READ is needed for any access and will be
    627	 * cleared for PROT_NONE
    628	 */
    629	if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
    630		return false;
    631
    632	if (write && !pte_write(pte))
    633		return false;
    634
    635	return arch_pte_access_permitted(pte_val(pte), write, 0);
    636}
    637
    638/*
    639 * Conversion functions: convert a page and protection to a page entry,
    640 * and a page entry and page directory to the page they refer to.
    641 *
    642 * Even if PTEs can be unsigned long long, a PFN is always an unsigned
    643 * long for now.
    644 */
    645static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
    646{
    647	VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
    648	VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
    649
    650	return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE);
    651}
    652
    653static inline unsigned long pte_pfn(pte_t pte)
    654{
    655	return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT;
    656}
    657
    658/* Generic modifiers for PTE bits */
    659static inline pte_t pte_wrprotect(pte_t pte)
    660{
    661	if (unlikely(pte_savedwrite(pte)))
    662		return pte_clear_savedwrite(pte);
    663	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
    664}
    665
    666static inline pte_t pte_exprotect(pte_t pte)
    667{
    668	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_EXEC));
    669}
    670
    671static inline pte_t pte_mkclean(pte_t pte)
    672{
    673	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_DIRTY));
    674}
    675
    676static inline pte_t pte_mkold(pte_t pte)
    677{
    678	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_ACCESSED));
    679}
    680
    681static inline pte_t pte_mkexec(pte_t pte)
    682{
    683	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
    684}
    685
    686static inline pte_t pte_mkwrite(pte_t pte)
    687{
    688	/*
    689	 * write implies read, hence set both
    690	 */
    691	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_RW));
    692}
    693
    694static inline pte_t pte_mkdirty(pte_t pte)
    695{
    696	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_DIRTY | _PAGE_SOFT_DIRTY));
    697}
    698
    699static inline pte_t pte_mkyoung(pte_t pte)
    700{
    701	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_ACCESSED));
    702}
    703
    704static inline pte_t pte_mkspecial(pte_t pte)
    705{
    706	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL));
    707}
    708
    709static inline pte_t pte_mkhuge(pte_t pte)
    710{
    711	return pte;
    712}
    713
    714static inline pte_t pte_mkdevmap(pte_t pte)
    715{
    716	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL | _PAGE_DEVMAP));
    717}
    718
    719static inline pte_t pte_mkprivileged(pte_t pte)
    720{
    721	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
    722}
    723
    724static inline pte_t pte_mkuser(pte_t pte)
    725{
    726	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
    727}
    728
    729/*
    730 * This is potentially called with a pmd as the argument, in which case it's not
    731 * safe to check _PAGE_DEVMAP unless we also confirm that _PAGE_PTE is set.
    732 * That's because the bit we use for _PAGE_DEVMAP is not reserved for software
    733 * use in page directory entries (ie. non-ptes).
    734 */
    735static inline int pte_devmap(pte_t pte)
    736{
    737	u64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE);
    738
    739	return (pte_raw(pte) & mask) == mask;
    740}
    741
    742static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
    743{
    744	/* FIXME!! check whether this need to be a conditional */
    745	return __pte_raw((pte_raw(pte) & cpu_to_be64(_PAGE_CHG_MASK)) |
    746			 cpu_to_be64(pgprot_val(newprot)));
    747}
    748
    749/* Encode and de-code a swap entry */
    750#define MAX_SWAPFILES_CHECK() do { \
    751	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
    752	/*							\
    753	 * Don't have overlapping bits with _PAGE_HPTEFLAGS	\
    754	 * We filter HPTEFLAGS on set_pte.			\
    755	 */							\
    756	BUILD_BUG_ON(_PAGE_HPTEFLAGS & SWP_TYPE_MASK); \
    757	BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY);	\
    758	BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_EXCLUSIVE);	\
    759	} while (0)
    760
    761#define SWP_TYPE_BITS 5
    762#define SWP_TYPE_MASK		((1UL << SWP_TYPE_BITS) - 1)
    763#define __swp_type(x)		((x).val & SWP_TYPE_MASK)
    764#define __swp_offset(x)		(((x).val & PTE_RPN_MASK) >> PAGE_SHIFT)
    765#define __swp_entry(type, offset)	((swp_entry_t) { \
    766				(type) | (((offset) << PAGE_SHIFT) & PTE_RPN_MASK)})
    767/*
    768 * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
    769 * swap type and offset we get from swap and convert that to pte to find a
    770 * matching pte in linux page table.
    771 * Clear bits not found in swap entries here.
    772 */
    773#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
    774#define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
    775#define __pmd_to_swp_entry(pmd)	(__pte_to_swp_entry(pmd_pte(pmd)))
    776#define __swp_entry_to_pmd(x)	(pte_pmd(__swp_entry_to_pte(x)))
    777
    778#ifdef CONFIG_MEM_SOFT_DIRTY
    779#define _PAGE_SWP_SOFT_DIRTY	_PAGE_SOFT_DIRTY
    780#else
    781#define _PAGE_SWP_SOFT_DIRTY	0UL
    782#endif /* CONFIG_MEM_SOFT_DIRTY */
    783
    784#define _PAGE_SWP_EXCLUSIVE	_PAGE_NON_IDEMPOTENT
    785
    786#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
    787static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
    788{
    789	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
    790}
    791
    792static inline bool pte_swp_soft_dirty(pte_t pte)
    793{
    794	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
    795}
    796
    797static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
    798{
    799	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_SOFT_DIRTY));
    800}
    801#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
    802
    803#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
    804static inline pte_t pte_swp_mkexclusive(pte_t pte)
    805{
    806	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
    807}
    808
    809static inline int pte_swp_exclusive(pte_t pte)
    810{
    811	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
    812}
    813
    814static inline pte_t pte_swp_clear_exclusive(pte_t pte)
    815{
    816	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_EXCLUSIVE));
    817}
    818
    819static inline bool check_pte_access(unsigned long access, unsigned long ptev)
    820{
    821	/*
    822	 * This check for _PAGE_RWX and _PAGE_PRESENT bits
    823	 */
    824	if (access & ~ptev)
    825		return false;
    826	/*
    827	 * This check for access to privilege space
    828	 */
    829	if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED))
    830		return false;
    831
    832	return true;
    833}
    834/*
    835 * Generic functions with hash/radix callbacks
    836 */
    837
    838static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
    839					   pte_t *ptep, pte_t entry,
    840					   unsigned long address,
    841					   int psize)
    842{
    843	if (radix_enabled())
    844		return radix__ptep_set_access_flags(vma, ptep, entry,
    845						    address, psize);
    846	return hash__ptep_set_access_flags(ptep, entry);
    847}
    848
    849#define __HAVE_ARCH_PTE_SAME
    850static inline int pte_same(pte_t pte_a, pte_t pte_b)
    851{
    852	if (radix_enabled())
    853		return radix__pte_same(pte_a, pte_b);
    854	return hash__pte_same(pte_a, pte_b);
    855}
    856
    857static inline int pte_none(pte_t pte)
    858{
    859	if (radix_enabled())
    860		return radix__pte_none(pte);
    861	return hash__pte_none(pte);
    862}
    863
    864static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
    865				pte_t *ptep, pte_t pte, int percpu)
    866{
    867
    868	VM_WARN_ON(!(pte_raw(pte) & cpu_to_be64(_PAGE_PTE)));
    869	/*
    870	 * Keep the _PAGE_PTE added till we are sure we handle _PAGE_PTE
    871	 * in all the callers.
    872	 */
    873	pte = __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
    874
    875	if (radix_enabled())
    876		return radix__set_pte_at(mm, addr, ptep, pte, percpu);
    877	return hash__set_pte_at(mm, addr, ptep, pte, percpu);
    878}
    879
    880#define _PAGE_CACHE_CTL	(_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
    881
    882#define pgprot_noncached pgprot_noncached
    883static inline pgprot_t pgprot_noncached(pgprot_t prot)
    884{
    885	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
    886			_PAGE_NON_IDEMPOTENT);
    887}
    888
    889#define pgprot_noncached_wc pgprot_noncached_wc
    890static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
    891{
    892	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
    893			_PAGE_TOLERANT);
    894}
    895
    896#define pgprot_cached pgprot_cached
    897static inline pgprot_t pgprot_cached(pgprot_t prot)
    898{
    899	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
    900}
    901
    902#define pgprot_writecombine pgprot_writecombine
    903static inline pgprot_t pgprot_writecombine(pgprot_t prot)
    904{
    905	return pgprot_noncached_wc(prot);
    906}
    907/*
    908 * check a pte mapping have cache inhibited property
    909 */
    910static inline bool pte_ci(pte_t pte)
    911{
    912	__be64 pte_v = pte_raw(pte);
    913
    914	if (((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_TOLERANT)) ||
    915	    ((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_NON_IDEMPOTENT)))
    916		return true;
    917	return false;
    918}
    919
    920static inline void pmd_clear(pmd_t *pmdp)
    921{
    922	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
    923		/*
    924		 * Don't use this if we can possibly have a hash page table
    925		 * entry mapping this.
    926		 */
    927		WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
    928	}
    929	*pmdp = __pmd(0);
    930}
    931
    932static inline int pmd_none(pmd_t pmd)
    933{
    934	return !pmd_raw(pmd);
    935}
    936
    937static inline int pmd_present(pmd_t pmd)
    938{
    939	/*
    940	 * A pmd is considerent present if _PAGE_PRESENT is set.
    941	 * We also need to consider the pmd present which is marked
    942	 * invalid during a split. Hence we look for _PAGE_INVALID
    943	 * if we find _PAGE_PRESENT cleared.
    944	 */
    945	if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
    946		return true;
    947
    948	return false;
    949}
    950
    951static inline int pmd_is_serializing(pmd_t pmd)
    952{
    953	/*
    954	 * If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear
    955	 * and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate).
    956	 *
    957	 * This condition may also occur when flushing a pmd while flushing
    958	 * it (see ptep_modify_prot_start), so callers must ensure this
    959	 * case is fine as well.
    960	 */
    961	if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) ==
    962						cpu_to_be64(_PAGE_INVALID))
    963		return true;
    964
    965	return false;
    966}
    967
    968static inline int pmd_bad(pmd_t pmd)
    969{
    970	if (radix_enabled())
    971		return radix__pmd_bad(pmd);
    972	return hash__pmd_bad(pmd);
    973}
    974
    975static inline void pud_clear(pud_t *pudp)
    976{
    977	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
    978		/*
    979		 * Don't use this if we can possibly have a hash page table
    980		 * entry mapping this.
    981		 */
    982		WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
    983	}
    984	*pudp = __pud(0);
    985}
    986
    987static inline int pud_none(pud_t pud)
    988{
    989	return !pud_raw(pud);
    990}
    991
    992static inline int pud_present(pud_t pud)
    993{
    994	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
    995}
    996
    997extern struct page *pud_page(pud_t pud);
    998extern struct page *pmd_page(pmd_t pmd);
    999static inline pte_t pud_pte(pud_t pud)
   1000{
   1001	return __pte_raw(pud_raw(pud));
   1002}
   1003
   1004static inline pud_t pte_pud(pte_t pte)
   1005{
   1006	return __pud_raw(pte_raw(pte));
   1007}
   1008#define pud_write(pud)		pte_write(pud_pte(pud))
   1009
   1010static inline int pud_bad(pud_t pud)
   1011{
   1012	if (radix_enabled())
   1013		return radix__pud_bad(pud);
   1014	return hash__pud_bad(pud);
   1015}
   1016
   1017#define pud_access_permitted pud_access_permitted
   1018static inline bool pud_access_permitted(pud_t pud, bool write)
   1019{
   1020	return pte_access_permitted(pud_pte(pud), write);
   1021}
   1022
   1023#define __p4d_raw(x)	((p4d_t) { __pgd_raw(x) })
   1024static inline __be64 p4d_raw(p4d_t x)
   1025{
   1026	return pgd_raw(x.pgd);
   1027}
   1028
   1029#define p4d_write(p4d)		pte_write(p4d_pte(p4d))
   1030
   1031static inline void p4d_clear(p4d_t *p4dp)
   1032{
   1033	*p4dp = __p4d(0);
   1034}
   1035
   1036static inline int p4d_none(p4d_t p4d)
   1037{
   1038	return !p4d_raw(p4d);
   1039}
   1040
   1041static inline int p4d_present(p4d_t p4d)
   1042{
   1043	return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PRESENT));
   1044}
   1045
   1046static inline pte_t p4d_pte(p4d_t p4d)
   1047{
   1048	return __pte_raw(p4d_raw(p4d));
   1049}
   1050
   1051static inline p4d_t pte_p4d(pte_t pte)
   1052{
   1053	return __p4d_raw(pte_raw(pte));
   1054}
   1055
   1056static inline int p4d_bad(p4d_t p4d)
   1057{
   1058	if (radix_enabled())
   1059		return radix__p4d_bad(p4d);
   1060	return hash__p4d_bad(p4d);
   1061}
   1062
   1063#define p4d_access_permitted p4d_access_permitted
   1064static inline bool p4d_access_permitted(p4d_t p4d, bool write)
   1065{
   1066	return pte_access_permitted(p4d_pte(p4d), write);
   1067}
   1068
   1069extern struct page *p4d_page(p4d_t p4d);
   1070
   1071/* Pointers in the page table tree are physical addresses */
   1072#define __pgtable_ptr_val(ptr)	__pa(ptr)
   1073
   1074static inline pud_t *p4d_pgtable(p4d_t p4d)
   1075{
   1076	return (pud_t *)__va(p4d_val(p4d) & ~P4D_MASKED_BITS);
   1077}
   1078
   1079static inline pmd_t *pud_pgtable(pud_t pud)
   1080{
   1081	return (pmd_t *)__va(pud_val(pud) & ~PUD_MASKED_BITS);
   1082}
   1083
   1084#define pte_ERROR(e) \
   1085	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
   1086#define pmd_ERROR(e) \
   1087	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
   1088#define pud_ERROR(e) \
   1089	pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
   1090#define pgd_ERROR(e) \
   1091	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
   1092
   1093static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
   1094{
   1095	if (radix_enabled()) {
   1096#if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM)
   1097		unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift;
   1098		WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE");
   1099#endif
   1100		return radix__map_kernel_page(ea, pa, prot, PAGE_SIZE);
   1101	}
   1102	return hash__map_kernel_page(ea, pa, prot);
   1103}
   1104
   1105void unmap_kernel_page(unsigned long va);
   1106
   1107static inline int __meminit vmemmap_create_mapping(unsigned long start,
   1108						   unsigned long page_size,
   1109						   unsigned long phys)
   1110{
   1111	if (radix_enabled())
   1112		return radix__vmemmap_create_mapping(start, page_size, phys);
   1113	return hash__vmemmap_create_mapping(start, page_size, phys);
   1114}
   1115
   1116#ifdef CONFIG_MEMORY_HOTPLUG
   1117static inline void vmemmap_remove_mapping(unsigned long start,
   1118					  unsigned long page_size)
   1119{
   1120	if (radix_enabled())
   1121		return radix__vmemmap_remove_mapping(start, page_size);
   1122	return hash__vmemmap_remove_mapping(start, page_size);
   1123}
   1124#endif
   1125
   1126#ifdef CONFIG_DEBUG_PAGEALLOC
   1127static inline void __kernel_map_pages(struct page *page, int numpages, int enable)
   1128{
   1129	if (radix_enabled())
   1130		radix__kernel_map_pages(page, numpages, enable);
   1131	else
   1132		hash__kernel_map_pages(page, numpages, enable);
   1133}
   1134#endif
   1135
   1136static inline pte_t pmd_pte(pmd_t pmd)
   1137{
   1138	return __pte_raw(pmd_raw(pmd));
   1139}
   1140
   1141static inline pmd_t pte_pmd(pte_t pte)
   1142{
   1143	return __pmd_raw(pte_raw(pte));
   1144}
   1145
   1146static inline pte_t *pmdp_ptep(pmd_t *pmd)
   1147{
   1148	return (pte_t *)pmd;
   1149}
   1150#define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
   1151#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
   1152#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
   1153#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
   1154#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
   1155#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
   1156#define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
   1157#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
   1158#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
   1159#define pmd_mk_savedwrite(pmd)	pte_pmd(pte_mk_savedwrite(pmd_pte(pmd)))
   1160#define pmd_clear_savedwrite(pmd)	pte_pmd(pte_clear_savedwrite(pmd_pte(pmd)))
   1161
   1162#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
   1163#define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
   1164#define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
   1165#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
   1166
   1167#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
   1168#define pmd_swp_mksoft_dirty(pmd)	pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
   1169#define pmd_swp_soft_dirty(pmd)		pte_swp_soft_dirty(pmd_pte(pmd))
   1170#define pmd_swp_clear_soft_dirty(pmd)	pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
   1171#endif
   1172#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
   1173
   1174#ifdef CONFIG_NUMA_BALANCING
   1175static inline int pmd_protnone(pmd_t pmd)
   1176{
   1177	return pte_protnone(pmd_pte(pmd));
   1178}
   1179#endif /* CONFIG_NUMA_BALANCING */
   1180
   1181#define pmd_write(pmd)		pte_write(pmd_pte(pmd))
   1182#define __pmd_write(pmd)	__pte_write(pmd_pte(pmd))
   1183#define pmd_savedwrite(pmd)	pte_savedwrite(pmd_pte(pmd))
   1184
   1185#define pmd_access_permitted pmd_access_permitted
   1186static inline bool pmd_access_permitted(pmd_t pmd, bool write)
   1187{
   1188	/*
   1189	 * pmdp_invalidate sets this combination (which is not caught by
   1190	 * !pte_present() check in pte_access_permitted), to prevent
   1191	 * lock-free lookups, as part of the serialize_against_pte_lookup()
   1192	 * synchronisation.
   1193	 *
   1194	 * This also catches the case where the PTE's hardware PRESENT bit is
   1195	 * cleared while TLB is flushed, which is suboptimal but should not
   1196	 * be frequent.
   1197	 */
   1198	if (pmd_is_serializing(pmd))
   1199		return false;
   1200
   1201	return pte_access_permitted(pmd_pte(pmd), write);
   1202}
   1203
   1204#ifdef CONFIG_TRANSPARENT_HUGEPAGE
   1205extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
   1206extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
   1207extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
   1208extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
   1209		       pmd_t *pmdp, pmd_t pmd);
   1210static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
   1211					unsigned long addr, pmd_t *pmd)
   1212{
   1213}
   1214
   1215extern int hash__has_transparent_hugepage(void);
   1216static inline int has_transparent_hugepage(void)
   1217{
   1218	if (radix_enabled())
   1219		return radix__has_transparent_hugepage();
   1220	return hash__has_transparent_hugepage();
   1221}
   1222#define has_transparent_hugepage has_transparent_hugepage
   1223
   1224static inline unsigned long
   1225pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
   1226		    unsigned long clr, unsigned long set)
   1227{
   1228	if (radix_enabled())
   1229		return radix__pmd_hugepage_update(mm, addr, pmdp, clr, set);
   1230	return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
   1231}
   1232
   1233/*
   1234 * returns true for pmd migration entries, THP, devmap, hugetlb
   1235 * But compile time dependent on THP config
   1236 */
   1237static inline int pmd_large(pmd_t pmd)
   1238{
   1239	return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
   1240}
   1241
   1242/*
   1243 * For radix we should always find H_PAGE_HASHPTE zero. Hence
   1244 * the below will work for radix too
   1245 */
   1246static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
   1247					      unsigned long addr, pmd_t *pmdp)
   1248{
   1249	unsigned long old;
   1250
   1251	if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
   1252		return 0;
   1253	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
   1254	return ((old & _PAGE_ACCESSED) != 0);
   1255}
   1256
   1257#define __HAVE_ARCH_PMDP_SET_WRPROTECT
   1258static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
   1259				      pmd_t *pmdp)
   1260{
   1261	if (__pmd_write((*pmdp)))
   1262		pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
   1263	else if (unlikely(pmd_savedwrite(*pmdp)))
   1264		pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
   1265}
   1266
   1267/*
   1268 * Only returns true for a THP. False for pmd migration entry.
   1269 * We also need to return true when we come across a pte that
   1270 * in between a thp split. While splitting THP, we mark the pmd
   1271 * invalid (pmdp_invalidate()) before we set it with pte page
   1272 * address. A pmd_trans_huge() check against a pmd entry during that time
   1273 * should return true.
   1274 * We should not call this on a hugetlb entry. We should check for HugeTLB
   1275 * entry using vma->vm_flags
   1276 * The page table walk rule is explained in Documentation/vm/transhuge.rst
   1277 */
   1278static inline int pmd_trans_huge(pmd_t pmd)
   1279{
   1280	if (!pmd_present(pmd))
   1281		return false;
   1282
   1283	if (radix_enabled())
   1284		return radix__pmd_trans_huge(pmd);
   1285	return hash__pmd_trans_huge(pmd);
   1286}
   1287
   1288#define __HAVE_ARCH_PMD_SAME
   1289static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
   1290{
   1291	if (radix_enabled())
   1292		return radix__pmd_same(pmd_a, pmd_b);
   1293	return hash__pmd_same(pmd_a, pmd_b);
   1294}
   1295
   1296static inline pmd_t __pmd_mkhuge(pmd_t pmd)
   1297{
   1298	if (radix_enabled())
   1299		return radix__pmd_mkhuge(pmd);
   1300	return hash__pmd_mkhuge(pmd);
   1301}
   1302
   1303/*
   1304 * pfn_pmd return a pmd_t that can be used as pmd pte entry.
   1305 */
   1306static inline pmd_t pmd_mkhuge(pmd_t pmd)
   1307{
   1308#ifdef CONFIG_DEBUG_VM
   1309	if (radix_enabled())
   1310		WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)) == 0);
   1311	else
   1312		WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE)) !=
   1313			cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE));
   1314#endif
   1315	return pmd;
   1316}
   1317
   1318#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
   1319extern int pmdp_set_access_flags(struct vm_area_struct *vma,
   1320				 unsigned long address, pmd_t *pmdp,
   1321				 pmd_t entry, int dirty);
   1322
   1323#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
   1324extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
   1325				     unsigned long address, pmd_t *pmdp);
   1326
   1327#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
   1328static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
   1329					    unsigned long addr, pmd_t *pmdp)
   1330{
   1331	if (radix_enabled())
   1332		return radix__pmdp_huge_get_and_clear(mm, addr, pmdp);
   1333	return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
   1334}
   1335
   1336static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
   1337					unsigned long address, pmd_t *pmdp)
   1338{
   1339	if (radix_enabled())
   1340		return radix__pmdp_collapse_flush(vma, address, pmdp);
   1341	return hash__pmdp_collapse_flush(vma, address, pmdp);
   1342}
   1343#define pmdp_collapse_flush pmdp_collapse_flush
   1344
   1345#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
   1346pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
   1347				   unsigned long addr,
   1348				   pmd_t *pmdp, int full);
   1349
   1350#define __HAVE_ARCH_PGTABLE_DEPOSIT
   1351static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
   1352					      pmd_t *pmdp, pgtable_t pgtable)
   1353{
   1354	if (radix_enabled())
   1355		return radix__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
   1356	return hash__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
   1357}
   1358
   1359#define __HAVE_ARCH_PGTABLE_WITHDRAW
   1360static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
   1361						    pmd_t *pmdp)
   1362{
   1363	if (radix_enabled())
   1364		return radix__pgtable_trans_huge_withdraw(mm, pmdp);
   1365	return hash__pgtable_trans_huge_withdraw(mm, pmdp);
   1366}
   1367
   1368#define __HAVE_ARCH_PMDP_INVALIDATE
   1369extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
   1370			     pmd_t *pmdp);
   1371
   1372#define pmd_move_must_withdraw pmd_move_must_withdraw
   1373struct spinlock;
   1374extern int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
   1375				  struct spinlock *old_pmd_ptl,
   1376				  struct vm_area_struct *vma);
   1377/*
   1378 * Hash translation mode use the deposited table to store hash pte
   1379 * slot information.
   1380 */
   1381#define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
   1382static inline bool arch_needs_pgtable_deposit(void)
   1383{
   1384	if (radix_enabled())
   1385		return false;
   1386	return true;
   1387}
   1388extern void serialize_against_pte_lookup(struct mm_struct *mm);
   1389
   1390
   1391static inline pmd_t pmd_mkdevmap(pmd_t pmd)
   1392{
   1393	if (radix_enabled())
   1394		return radix__pmd_mkdevmap(pmd);
   1395	return hash__pmd_mkdevmap(pmd);
   1396}
   1397
   1398static inline int pmd_devmap(pmd_t pmd)
   1399{
   1400	return pte_devmap(pmd_pte(pmd));
   1401}
   1402
   1403static inline int pud_devmap(pud_t pud)
   1404{
   1405	return 0;
   1406}
   1407
   1408static inline int pgd_devmap(pgd_t pgd)
   1409{
   1410	return 0;
   1411}
   1412#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
   1413
   1414static inline int pud_pfn(pud_t pud)
   1415{
   1416	/*
   1417	 * Currently all calls to pud_pfn() are gated around a pud_devmap()
   1418	 * check so this should never be used. If it grows another user we
   1419	 * want to know about it.
   1420	 */
   1421	BUILD_BUG();
   1422	return 0;
   1423}
   1424#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
   1425pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
   1426void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
   1427			     pte_t *, pte_t, pte_t);
   1428
   1429/*
   1430 * Returns true for a R -> RW upgrade of pte
   1431 */
   1432static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_val)
   1433{
   1434	if (!(old_val & _PAGE_READ))
   1435		return false;
   1436
   1437	if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE))
   1438		return true;
   1439
   1440	return false;
   1441}
   1442
   1443/*
   1444 * Like pmd_huge() and pmd_large(), but works regardless of config options
   1445 */
   1446#define pmd_is_leaf pmd_is_leaf
   1447#define pmd_leaf pmd_is_leaf
   1448static inline bool pmd_is_leaf(pmd_t pmd)
   1449{
   1450	return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
   1451}
   1452
   1453#define pud_is_leaf pud_is_leaf
   1454#define pud_leaf pud_is_leaf
   1455static inline bool pud_is_leaf(pud_t pud)
   1456{
   1457	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
   1458}
   1459
   1460#define p4d_is_leaf p4d_is_leaf
   1461#define p4d_leaf p4d_is_leaf
   1462static inline bool p4d_is_leaf(p4d_t p4d)
   1463{
   1464	return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PTE));
   1465}
   1466
   1467#endif /* __ASSEMBLY__ */
   1468#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */