cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

hugetlbpage.c (9915B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 *  IBM System z Huge TLB Page Support for Kernel.
      4 *
      5 *    Copyright IBM Corp. 2007,2020
      6 *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
      7 */
      8
      9#define KMSG_COMPONENT "hugetlb"
     10#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
     11
     12#include <asm/pgalloc.h>
     13#include <linux/mm.h>
     14#include <linux/hugetlb.h>
     15#include <linux/mman.h>
     16#include <linux/sched/mm.h>
     17#include <linux/security.h>
     18
     19/*
     20 * If the bit selected by single-bit bitmask "a" is set within "x", move
     21 * it to the position indicated by single-bit bitmask "b".
     22 */
     23#define move_set_bit(x, a, b)	(((x) & (a)) >> ilog2(a) << ilog2(b))
     24
     25static inline unsigned long __pte_to_rste(pte_t pte)
     26{
     27	unsigned long rste;
     28
     29	/*
     30	 * Convert encoding		  pte bits	pmd / pud bits
     31	 *				lIR.uswrdy.p	dy..R...I...wr
     32	 * empty			010.000000.0 -> 00..0...1...00
     33	 * prot-none, clean, old	111.000000.1 -> 00..1...1...00
     34	 * prot-none, clean, young	111.000001.1 -> 01..1...1...00
     35	 * prot-none, dirty, old	111.000010.1 -> 10..1...1...00
     36	 * prot-none, dirty, young	111.000011.1 -> 11..1...1...00
     37	 * read-only, clean, old	111.000100.1 -> 00..1...1...01
     38	 * read-only, clean, young	101.000101.1 -> 01..1...0...01
     39	 * read-only, dirty, old	111.000110.1 -> 10..1...1...01
     40	 * read-only, dirty, young	101.000111.1 -> 11..1...0...01
     41	 * read-write, clean, old	111.001100.1 -> 00..1...1...11
     42	 * read-write, clean, young	101.001101.1 -> 01..1...0...11
     43	 * read-write, dirty, old	110.001110.1 -> 10..0...1...11
     44	 * read-write, dirty, young	100.001111.1 -> 11..0...0...11
     45	 * HW-bits: R read-only, I invalid
     46	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
     47	 *	    u unused, l large
     48	 */
     49	if (pte_present(pte)) {
     50		rste = pte_val(pte) & PAGE_MASK;
     51		rste |= move_set_bit(pte_val(pte), _PAGE_READ,
     52				     _SEGMENT_ENTRY_READ);
     53		rste |= move_set_bit(pte_val(pte), _PAGE_WRITE,
     54				     _SEGMENT_ENTRY_WRITE);
     55		rste |= move_set_bit(pte_val(pte), _PAGE_INVALID,
     56				     _SEGMENT_ENTRY_INVALID);
     57		rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT,
     58				     _SEGMENT_ENTRY_PROTECT);
     59		rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY,
     60				     _SEGMENT_ENTRY_DIRTY);
     61		rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG,
     62				     _SEGMENT_ENTRY_YOUNG);
     63#ifdef CONFIG_MEM_SOFT_DIRTY
     64		rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY,
     65				     _SEGMENT_ENTRY_SOFT_DIRTY);
     66#endif
     67		rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC,
     68				     _SEGMENT_ENTRY_NOEXEC);
     69	} else
     70		rste = _SEGMENT_ENTRY_EMPTY;
     71	return rste;
     72}
     73
     74static inline pte_t __rste_to_pte(unsigned long rste)
     75{
     76	unsigned long pteval;
     77	int present;
     78
     79	if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
     80		present = pud_present(__pud(rste));
     81	else
     82		present = pmd_present(__pmd(rste));
     83
     84	/*
     85	 * Convert encoding		pmd / pud bits	    pte bits
     86	 *				dy..R...I...wr	  lIR.uswrdy.p
     87	 * empty			00..0...1...00 -> 010.000000.0
     88	 * prot-none, clean, old	00..1...1...00 -> 111.000000.1
     89	 * prot-none, clean, young	01..1...1...00 -> 111.000001.1
     90	 * prot-none, dirty, old	10..1...1...00 -> 111.000010.1
     91	 * prot-none, dirty, young	11..1...1...00 -> 111.000011.1
     92	 * read-only, clean, old	00..1...1...01 -> 111.000100.1
     93	 * read-only, clean, young	01..1...0...01 -> 101.000101.1
     94	 * read-only, dirty, old	10..1...1...01 -> 111.000110.1
     95	 * read-only, dirty, young	11..1...0...01 -> 101.000111.1
     96	 * read-write, clean, old	00..1...1...11 -> 111.001100.1
     97	 * read-write, clean, young	01..1...0...11 -> 101.001101.1
     98	 * read-write, dirty, old	10..0...1...11 -> 110.001110.1
     99	 * read-write, dirty, young	11..0...0...11 -> 100.001111.1
    100	 * HW-bits: R read-only, I invalid
    101	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
    102	 *	    u unused, l large
    103	 */
    104	if (present) {
    105		pteval = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
    106		pteval |= _PAGE_LARGE | _PAGE_PRESENT;
    107		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_READ, _PAGE_READ);
    108		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, _PAGE_WRITE);
    109		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, _PAGE_INVALID);
    110		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, _PAGE_PROTECT);
    111		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, _PAGE_DIRTY);
    112		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, _PAGE_YOUNG);
    113#ifdef CONFIG_MEM_SOFT_DIRTY
    114		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY);
    115#endif
    116		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC);
    117	} else
    118		pteval = _PAGE_INVALID;
    119	return __pte(pteval);
    120}
    121
    122static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
    123{
    124	struct page *page;
    125	unsigned long size, paddr;
    126
    127	if (!mm_uses_skeys(mm) ||
    128	    rste & _SEGMENT_ENTRY_INVALID)
    129		return;
    130
    131	if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
    132		page = pud_page(__pud(rste));
    133		size = PUD_SIZE;
    134		paddr = rste & PUD_MASK;
    135	} else {
    136		page = pmd_page(__pmd(rste));
    137		size = PMD_SIZE;
    138		paddr = rste & PMD_MASK;
    139	}
    140
    141	if (!test_and_set_bit(PG_arch_1, &page->flags))
    142		__storage_key_init_range(paddr, paddr + size - 1);
    143}
    144
    145void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
    146		     pte_t *ptep, pte_t pte)
    147{
    148	unsigned long rste;
    149
    150	rste = __pte_to_rste(pte);
    151	if (!MACHINE_HAS_NX)
    152		rste &= ~_SEGMENT_ENTRY_NOEXEC;
    153
    154	/* Set correct table type for 2G hugepages */
    155	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
    156		if (likely(pte_present(pte)))
    157			rste |= _REGION3_ENTRY_LARGE;
    158		rste |= _REGION_ENTRY_TYPE_R3;
    159	} else if (likely(pte_present(pte)))
    160		rste |= _SEGMENT_ENTRY_LARGE;
    161
    162	clear_huge_pte_skeys(mm, rste);
    163	set_pte(ptep, __pte(rste));
    164}
    165
    166pte_t huge_ptep_get(pte_t *ptep)
    167{
    168	return __rste_to_pte(pte_val(*ptep));
    169}
    170
    171pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
    172			      unsigned long addr, pte_t *ptep)
    173{
    174	pte_t pte = huge_ptep_get(ptep);
    175	pmd_t *pmdp = (pmd_t *) ptep;
    176	pud_t *pudp = (pud_t *) ptep;
    177
    178	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
    179		pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
    180	else
    181		pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
    182	return pte;
    183}
    184
    185pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
    186			unsigned long addr, unsigned long sz)
    187{
    188	pgd_t *pgdp;
    189	p4d_t *p4dp;
    190	pud_t *pudp;
    191	pmd_t *pmdp = NULL;
    192
    193	pgdp = pgd_offset(mm, addr);
    194	p4dp = p4d_alloc(mm, pgdp, addr);
    195	if (p4dp) {
    196		pudp = pud_alloc(mm, p4dp, addr);
    197		if (pudp) {
    198			if (sz == PUD_SIZE)
    199				return (pte_t *) pudp;
    200			else if (sz == PMD_SIZE)
    201				pmdp = pmd_alloc(mm, pudp, addr);
    202		}
    203	}
    204	return (pte_t *) pmdp;
    205}
    206
    207pte_t *huge_pte_offset(struct mm_struct *mm,
    208		       unsigned long addr, unsigned long sz)
    209{
    210	pgd_t *pgdp;
    211	p4d_t *p4dp;
    212	pud_t *pudp;
    213	pmd_t *pmdp = NULL;
    214
    215	pgdp = pgd_offset(mm, addr);
    216	if (pgd_present(*pgdp)) {
    217		p4dp = p4d_offset(pgdp, addr);
    218		if (p4d_present(*p4dp)) {
    219			pudp = pud_offset(p4dp, addr);
    220			if (pud_present(*pudp)) {
    221				if (pud_large(*pudp))
    222					return (pte_t *) pudp;
    223				pmdp = pmd_offset(pudp, addr);
    224			}
    225		}
    226	}
    227	return (pte_t *) pmdp;
    228}
    229
    230int pmd_huge(pmd_t pmd)
    231{
    232	return pmd_large(pmd);
    233}
    234
    235int pud_huge(pud_t pud)
    236{
    237	return pud_large(pud);
    238}
    239
    240struct page *
    241follow_huge_pud(struct mm_struct *mm, unsigned long address,
    242		pud_t *pud, int flags)
    243{
    244	if (flags & FOLL_GET)
    245		return NULL;
    246
    247	return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
    248}
    249
    250bool __init arch_hugetlb_valid_size(unsigned long size)
    251{
    252	if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
    253		return true;
    254	else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE)
    255		return true;
    256	else
    257		return false;
    258}
    259
    260static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
    261		unsigned long addr, unsigned long len,
    262		unsigned long pgoff, unsigned long flags)
    263{
    264	struct hstate *h = hstate_file(file);
    265	struct vm_unmapped_area_info info;
    266
    267	info.flags = 0;
    268	info.length = len;
    269	info.low_limit = current->mm->mmap_base;
    270	info.high_limit = TASK_SIZE;
    271	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
    272	info.align_offset = 0;
    273	return vm_unmapped_area(&info);
    274}
    275
    276static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
    277		unsigned long addr0, unsigned long len,
    278		unsigned long pgoff, unsigned long flags)
    279{
    280	struct hstate *h = hstate_file(file);
    281	struct vm_unmapped_area_info info;
    282	unsigned long addr;
    283
    284	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
    285	info.length = len;
    286	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
    287	info.high_limit = current->mm->mmap_base;
    288	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
    289	info.align_offset = 0;
    290	addr = vm_unmapped_area(&info);
    291
    292	/*
    293	 * A failed mmap() very likely causes application failure,
    294	 * so fall back to the bottom-up function here. This scenario
    295	 * can happen with large stack limits and large mmap()
    296	 * allocations.
    297	 */
    298	if (addr & ~PAGE_MASK) {
    299		VM_BUG_ON(addr != -ENOMEM);
    300		info.flags = 0;
    301		info.low_limit = TASK_UNMAPPED_BASE;
    302		info.high_limit = TASK_SIZE;
    303		addr = vm_unmapped_area(&info);
    304	}
    305
    306	return addr;
    307}
    308
    309unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
    310		unsigned long len, unsigned long pgoff, unsigned long flags)
    311{
    312	struct hstate *h = hstate_file(file);
    313	struct mm_struct *mm = current->mm;
    314	struct vm_area_struct *vma;
    315
    316	if (len & ~huge_page_mask(h))
    317		return -EINVAL;
    318	if (len > TASK_SIZE - mmap_min_addr)
    319		return -ENOMEM;
    320
    321	if (flags & MAP_FIXED) {
    322		if (prepare_hugepage_range(file, addr, len))
    323			return -EINVAL;
    324		goto check_asce_limit;
    325	}
    326
    327	if (addr) {
    328		addr = ALIGN(addr, huge_page_size(h));
    329		vma = find_vma(mm, addr);
    330		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
    331		    (!vma || addr + len <= vm_start_gap(vma)))
    332			goto check_asce_limit;
    333	}
    334
    335	if (mm->get_unmapped_area == arch_get_unmapped_area)
    336		addr = hugetlb_get_unmapped_area_bottomup(file, addr, len,
    337				pgoff, flags);
    338	else
    339		addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
    340				pgoff, flags);
    341	if (offset_in_page(addr))
    342		return addr;
    343
    344check_asce_limit:
    345	return check_asce_limit(mm, addr, len);
    346}