cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vmem.c (14868B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 *    Copyright IBM Corp. 2006
      4 */
      5
      6#include <linux/memory_hotplug.h>
      7#include <linux/memblock.h>
      8#include <linux/pfn.h>
      9#include <linux/mm.h>
     10#include <linux/init.h>
     11#include <linux/list.h>
     12#include <linux/hugetlb.h>
     13#include <linux/slab.h>
     14#include <asm/cacheflush.h>
     15#include <asm/nospec-branch.h>
     16#include <asm/pgalloc.h>
     17#include <asm/setup.h>
     18#include <asm/tlbflush.h>
     19#include <asm/sections.h>
     20#include <asm/set_memory.h>
     21
     22static DEFINE_MUTEX(vmem_mutex);
     23
     24static void __ref *vmem_alloc_pages(unsigned int order)
     25{
     26	unsigned long size = PAGE_SIZE << order;
     27
     28	if (slab_is_available())
     29		return (void *)__get_free_pages(GFP_KERNEL, order);
     30	return memblock_alloc(size, size);
     31}
     32
     33static void vmem_free_pages(unsigned long addr, int order)
     34{
     35	/* We don't expect boot memory to be removed ever. */
     36	if (!slab_is_available() ||
     37	    WARN_ON_ONCE(PageReserved(virt_to_page(addr))))
     38		return;
     39	free_pages(addr, order);
     40}
     41
     42void *vmem_crst_alloc(unsigned long val)
     43{
     44	unsigned long *table;
     45
     46	table = vmem_alloc_pages(CRST_ALLOC_ORDER);
     47	if (table)
     48		crst_table_init(table, val);
     49	return table;
     50}
     51
     52pte_t __ref *vmem_pte_alloc(void)
     53{
     54	unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
     55	pte_t *pte;
     56
     57	if (slab_is_available())
     58		pte = (pte_t *) page_table_alloc(&init_mm);
     59	else
     60		pte = (pte_t *) memblock_alloc(size, size);
     61	if (!pte)
     62		return NULL;
     63	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
     64	return pte;
     65}
     66
     67static void vmem_pte_free(unsigned long *table)
     68{
     69	/* We don't expect boot memory to be removed ever. */
     70	if (!slab_is_available() ||
     71	    WARN_ON_ONCE(PageReserved(virt_to_page(table))))
     72		return;
     73	page_table_free(&init_mm, table);
     74}
     75
     76#define PAGE_UNUSED 0xFD
     77
     78/*
     79 * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
     80 * from unused_sub_pmd_start to next PMD_SIZE boundary.
     81 */
     82static unsigned long unused_sub_pmd_start;
     83
     84static void vmemmap_flush_unused_sub_pmd(void)
     85{
     86	if (!unused_sub_pmd_start)
     87		return;
     88	memset((void *)unused_sub_pmd_start, PAGE_UNUSED,
     89	       ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start);
     90	unused_sub_pmd_start = 0;
     91}
     92
     93static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end)
     94{
     95	/*
     96	 * As we expect to add in the same granularity as we remove, it's
     97	 * sufficient to mark only some piece used to block the memmap page from
     98	 * getting removed (just in case the memmap never gets initialized,
     99	 * e.g., because the memory block never gets onlined).
    100	 */
    101	memset((void *)start, 0, sizeof(struct page));
    102}
    103
    104static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
    105{
    106	/*
    107	 * We only optimize if the new used range directly follows the
    108	 * previously unused range (esp., when populating consecutive sections).
    109	 */
    110	if (unused_sub_pmd_start == start) {
    111		unused_sub_pmd_start = end;
    112		if (likely(IS_ALIGNED(unused_sub_pmd_start, PMD_SIZE)))
    113			unused_sub_pmd_start = 0;
    114		return;
    115	}
    116	vmemmap_flush_unused_sub_pmd();
    117	vmemmap_mark_sub_pmd_used(start, end);
    118}
    119
    120static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
    121{
    122	unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
    123
    124	vmemmap_flush_unused_sub_pmd();
    125
    126	/* Could be our memmap page is filled with PAGE_UNUSED already ... */
    127	vmemmap_mark_sub_pmd_used(start, end);
    128
    129	/* Mark the unused parts of the new memmap page PAGE_UNUSED. */
    130	if (!IS_ALIGNED(start, PMD_SIZE))
    131		memset((void *)page, PAGE_UNUSED, start - page);
    132	/*
    133	 * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
    134	 * consecutive sections. Remember for the last added PMD the last
    135	 * unused range in the populated PMD.
    136	 */
    137	if (!IS_ALIGNED(end, PMD_SIZE))
    138		unused_sub_pmd_start = end;
    139}
    140
    141/* Returns true if the PMD is completely unused and can be freed. */
    142static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
    143{
    144	unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
    145
    146	vmemmap_flush_unused_sub_pmd();
    147	memset((void *)start, PAGE_UNUSED, end - start);
    148	return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE);
    149}
    150
    151/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
    152static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
    153				  unsigned long end, bool add, bool direct)
    154{
    155	unsigned long prot, pages = 0;
    156	int ret = -ENOMEM;
    157	pte_t *pte;
    158
    159	prot = pgprot_val(PAGE_KERNEL);
    160	if (!MACHINE_HAS_NX)
    161		prot &= ~_PAGE_NOEXEC;
    162
    163	pte = pte_offset_kernel(pmd, addr);
    164	for (; addr < end; addr += PAGE_SIZE, pte++) {
    165		if (!add) {
    166			if (pte_none(*pte))
    167				continue;
    168			if (!direct)
    169				vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0);
    170			pte_clear(&init_mm, addr, pte);
    171		} else if (pte_none(*pte)) {
    172			if (!direct) {
    173				void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
    174
    175				if (!new_page)
    176					goto out;
    177				set_pte(pte, __pte(__pa(new_page) | prot));
    178			} else {
    179				set_pte(pte, __pte(__pa(addr) | prot));
    180			}
    181		} else {
    182			continue;
    183		}
    184		pages++;
    185	}
    186	ret = 0;
    187out:
    188	if (direct)
    189		update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
    190	return ret;
    191}
    192
    193static void try_free_pte_table(pmd_t *pmd, unsigned long start)
    194{
    195	pte_t *pte;
    196	int i;
    197
    198	/* We can safely assume this is fully in 1:1 mapping & vmemmap area */
    199	pte = pte_offset_kernel(pmd, start);
    200	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
    201		if (!pte_none(*pte))
    202			return;
    203	}
    204	vmem_pte_free((unsigned long *) pmd_deref(*pmd));
    205	pmd_clear(pmd);
    206}
    207
    208/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
    209static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
    210				  unsigned long end, bool add, bool direct)
    211{
    212	unsigned long next, prot, pages = 0;
    213	int ret = -ENOMEM;
    214	pmd_t *pmd;
    215	pte_t *pte;
    216
    217	prot = pgprot_val(SEGMENT_KERNEL);
    218	if (!MACHINE_HAS_NX)
    219		prot &= ~_SEGMENT_ENTRY_NOEXEC;
    220
    221	pmd = pmd_offset(pud, addr);
    222	for (; addr < end; addr = next, pmd++) {
    223		next = pmd_addr_end(addr, end);
    224		if (!add) {
    225			if (pmd_none(*pmd))
    226				continue;
    227			if (pmd_large(*pmd)) {
    228				if (IS_ALIGNED(addr, PMD_SIZE) &&
    229				    IS_ALIGNED(next, PMD_SIZE)) {
    230					if (!direct)
    231						vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
    232					pmd_clear(pmd);
    233					pages++;
    234				} else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
    235					vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
    236					pmd_clear(pmd);
    237				}
    238				continue;
    239			}
    240		} else if (pmd_none(*pmd)) {
    241			if (IS_ALIGNED(addr, PMD_SIZE) &&
    242			    IS_ALIGNED(next, PMD_SIZE) &&
    243			    MACHINE_HAS_EDAT1 && addr && direct &&
    244			    !debug_pagealloc_enabled()) {
    245				set_pmd(pmd, __pmd(__pa(addr) | prot));
    246				pages++;
    247				continue;
    248			} else if (!direct && MACHINE_HAS_EDAT1) {
    249				void *new_page;
    250
    251				/*
    252				 * Use 1MB frames for vmemmap if available. We
    253				 * always use large frames even if they are only
    254				 * partially used. Otherwise we would have also
    255				 * page tables since vmemmap_populate gets
    256				 * called for each section separately.
    257				 */
    258				new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
    259				if (new_page) {
    260					set_pmd(pmd, __pmd(__pa(new_page) | prot));
    261					if (!IS_ALIGNED(addr, PMD_SIZE) ||
    262					    !IS_ALIGNED(next, PMD_SIZE)) {
    263						vmemmap_use_new_sub_pmd(addr, next);
    264					}
    265					continue;
    266				}
    267			}
    268			pte = vmem_pte_alloc();
    269			if (!pte)
    270				goto out;
    271			pmd_populate(&init_mm, pmd, pte);
    272		} else if (pmd_large(*pmd)) {
    273			if (!direct)
    274				vmemmap_use_sub_pmd(addr, next);
    275			continue;
    276		}
    277		ret = modify_pte_table(pmd, addr, next, add, direct);
    278		if (ret)
    279			goto out;
    280		if (!add)
    281			try_free_pte_table(pmd, addr & PMD_MASK);
    282	}
    283	ret = 0;
    284out:
    285	if (direct)
    286		update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
    287	return ret;
    288}
    289
    290static void try_free_pmd_table(pud_t *pud, unsigned long start)
    291{
    292	const unsigned long end = start + PUD_SIZE;
    293	pmd_t *pmd;
    294	int i;
    295
    296	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
    297	if (end > VMALLOC_START)
    298		return;
    299#ifdef CONFIG_KASAN
    300	if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
    301		return;
    302#endif
    303	pmd = pmd_offset(pud, start);
    304	for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
    305		if (!pmd_none(*pmd))
    306			return;
    307	vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
    308	pud_clear(pud);
    309}
    310
    311static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
    312			    bool add, bool direct)
    313{
    314	unsigned long next, prot, pages = 0;
    315	int ret = -ENOMEM;
    316	pud_t *pud;
    317	pmd_t *pmd;
    318
    319	prot = pgprot_val(REGION3_KERNEL);
    320	if (!MACHINE_HAS_NX)
    321		prot &= ~_REGION_ENTRY_NOEXEC;
    322	pud = pud_offset(p4d, addr);
    323	for (; addr < end; addr = next, pud++) {
    324		next = pud_addr_end(addr, end);
    325		if (!add) {
    326			if (pud_none(*pud))
    327				continue;
    328			if (pud_large(*pud)) {
    329				if (IS_ALIGNED(addr, PUD_SIZE) &&
    330				    IS_ALIGNED(next, PUD_SIZE)) {
    331					pud_clear(pud);
    332					pages++;
    333				}
    334				continue;
    335			}
    336		} else if (pud_none(*pud)) {
    337			if (IS_ALIGNED(addr, PUD_SIZE) &&
    338			    IS_ALIGNED(next, PUD_SIZE) &&
    339			    MACHINE_HAS_EDAT2 && addr && direct &&
    340			    !debug_pagealloc_enabled()) {
    341				set_pud(pud, __pud(__pa(addr) | prot));
    342				pages++;
    343				continue;
    344			}
    345			pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
    346			if (!pmd)
    347				goto out;
    348			pud_populate(&init_mm, pud, pmd);
    349		} else if (pud_large(*pud)) {
    350			continue;
    351		}
    352		ret = modify_pmd_table(pud, addr, next, add, direct);
    353		if (ret)
    354			goto out;
    355		if (!add)
    356			try_free_pmd_table(pud, addr & PUD_MASK);
    357	}
    358	ret = 0;
    359out:
    360	if (direct)
    361		update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
    362	return ret;
    363}
    364
    365static void try_free_pud_table(p4d_t *p4d, unsigned long start)
    366{
    367	const unsigned long end = start + P4D_SIZE;
    368	pud_t *pud;
    369	int i;
    370
    371	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
    372	if (end > VMALLOC_START)
    373		return;
    374#ifdef CONFIG_KASAN
    375	if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
    376		return;
    377#endif
    378
    379	pud = pud_offset(p4d, start);
    380	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
    381		if (!pud_none(*pud))
    382			return;
    383	}
    384	vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
    385	p4d_clear(p4d);
    386}
    387
    388static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
    389			    bool add, bool direct)
    390{
    391	unsigned long next;
    392	int ret = -ENOMEM;
    393	p4d_t *p4d;
    394	pud_t *pud;
    395
    396	p4d = p4d_offset(pgd, addr);
    397	for (; addr < end; addr = next, p4d++) {
    398		next = p4d_addr_end(addr, end);
    399		if (!add) {
    400			if (p4d_none(*p4d))
    401				continue;
    402		} else if (p4d_none(*p4d)) {
    403			pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
    404			if (!pud)
    405				goto out;
    406			p4d_populate(&init_mm, p4d, pud);
    407		}
    408		ret = modify_pud_table(p4d, addr, next, add, direct);
    409		if (ret)
    410			goto out;
    411		if (!add)
    412			try_free_pud_table(p4d, addr & P4D_MASK);
    413	}
    414	ret = 0;
    415out:
    416	return ret;
    417}
    418
    419static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
    420{
    421	const unsigned long end = start + PGDIR_SIZE;
    422	p4d_t *p4d;
    423	int i;
    424
    425	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
    426	if (end > VMALLOC_START)
    427		return;
    428#ifdef CONFIG_KASAN
    429	if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
    430		return;
    431#endif
    432
    433	p4d = p4d_offset(pgd, start);
    434	for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
    435		if (!p4d_none(*p4d))
    436			return;
    437	}
    438	vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
    439	pgd_clear(pgd);
    440}
    441
    442static int modify_pagetable(unsigned long start, unsigned long end, bool add,
    443			    bool direct)
    444{
    445	unsigned long addr, next;
    446	int ret = -ENOMEM;
    447	pgd_t *pgd;
    448	p4d_t *p4d;
    449
    450	if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
    451		return -EINVAL;
    452	for (addr = start; addr < end; addr = next) {
    453		next = pgd_addr_end(addr, end);
    454		pgd = pgd_offset_k(addr);
    455
    456		if (!add) {
    457			if (pgd_none(*pgd))
    458				continue;
    459		} else if (pgd_none(*pgd)) {
    460			p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
    461			if (!p4d)
    462				goto out;
    463			pgd_populate(&init_mm, pgd, p4d);
    464		}
    465		ret = modify_p4d_table(pgd, addr, next, add, direct);
    466		if (ret)
    467			goto out;
    468		if (!add)
    469			try_free_p4d_table(pgd, addr & PGDIR_MASK);
    470	}
    471	ret = 0;
    472out:
    473	if (!add)
    474		flush_tlb_kernel_range(start, end);
    475	return ret;
    476}
    477
    478static int add_pagetable(unsigned long start, unsigned long end, bool direct)
    479{
    480	return modify_pagetable(start, end, true, direct);
    481}
    482
    483static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
    484{
    485	return modify_pagetable(start, end, false, direct);
    486}
    487
    488/*
    489 * Add a physical memory range to the 1:1 mapping.
    490 */
    491static int vmem_add_range(unsigned long start, unsigned long size)
    492{
    493	return add_pagetable(start, start + size, true);
    494}
    495
    496/*
    497 * Remove a physical memory range from the 1:1 mapping.
    498 */
    499static void vmem_remove_range(unsigned long start, unsigned long size)
    500{
    501	remove_pagetable(start, start + size, true);
    502}
    503
    504/*
    505 * Add a backed mem_map array to the virtual mem_map array.
    506 */
    507int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
    508			       struct vmem_altmap *altmap)
    509{
    510	int ret;
    511
    512	mutex_lock(&vmem_mutex);
    513	/* We don't care about the node, just use NUMA_NO_NODE on allocations */
    514	ret = add_pagetable(start, end, false);
    515	if (ret)
    516		remove_pagetable(start, end, false);
    517	mutex_unlock(&vmem_mutex);
    518	return ret;
    519}
    520
    521void vmemmap_free(unsigned long start, unsigned long end,
    522		  struct vmem_altmap *altmap)
    523{
    524	mutex_lock(&vmem_mutex);
    525	remove_pagetable(start, end, false);
    526	mutex_unlock(&vmem_mutex);
    527}
    528
    529void vmem_remove_mapping(unsigned long start, unsigned long size)
    530{
    531	mutex_lock(&vmem_mutex);
    532	vmem_remove_range(start, size);
    533	mutex_unlock(&vmem_mutex);
    534}
    535
    536struct range arch_get_mappable_range(void)
    537{
    538	struct range mhp_range;
    539
    540	mhp_range.start = 0;
    541	mhp_range.end =  VMEM_MAX_PHYS - 1;
    542	return mhp_range;
    543}
    544
    545int vmem_add_mapping(unsigned long start, unsigned long size)
    546{
    547	struct range range = arch_get_mappable_range();
    548	int ret;
    549
    550	if (start < range.start ||
    551	    start + size > range.end + 1 ||
    552	    start + size < start)
    553		return -ERANGE;
    554
    555	mutex_lock(&vmem_mutex);
    556	ret = vmem_add_range(start, size);
    557	if (ret)
    558		vmem_remove_range(start, size);
    559	mutex_unlock(&vmem_mutex);
    560	return ret;
    561}
    562
    563/*
    564 * map whole physical memory to virtual memory (identity mapping)
    565 * we reserve enough space in the vmalloc area for vmemmap to hotplug
    566 * additional memory segments.
    567 */
    568void __init vmem_map_init(void)
    569{
    570	phys_addr_t base, end;
    571	u64 i;
    572
    573	for_each_mem_range(i, &base, &end)
    574		vmem_add_range(base, end - base);
    575	__set_memory((unsigned long)_stext,
    576		     (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
    577		     SET_MEMORY_RO | SET_MEMORY_X);
    578	__set_memory((unsigned long)_etext,
    579		     (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT,
    580		     SET_MEMORY_RO);
    581	__set_memory((unsigned long)_sinittext,
    582		     (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
    583		     SET_MEMORY_RO | SET_MEMORY_X);
    584	__set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
    585		     SET_MEMORY_RO | SET_MEMORY_X);
    586
    587	/* lowcore must be executable for LPSWE */
    588	if (!static_key_enabled(&cpu_has_bear))
    589		set_memory_x(0, 1);
    590
    591	pr_info("Write protected kernel read-only data: %luk\n",
    592		(unsigned long)(__end_rodata - _stext) >> 10);
    593}