cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sparse-vmemmap.c (20710B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Virtual Memory Map support
      4 *
      5 * (C) 2007 sgi. Christoph Lameter.
      6 *
      7 * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
      8 * virt_to_page, page_address() to be implemented as a base offset
      9 * calculation without memory access.
     10 *
     11 * However, virtual mappings need a page table and TLBs. Many Linux
     12 * architectures already map their physical space using 1-1 mappings
     13 * via TLBs. For those arches the virtual memory map is essentially
     14 * for free if we use the same page size as the 1-1 mappings. In that
     15 * case the overhead consists of a few additional pages that are
     16 * allocated to create a view of memory for vmemmap.
     17 *
     18 * The architecture is expected to provide a vmemmap_populate() function
     19 * to instantiate the mapping.
     20 */
     21#include <linux/mm.h>
     22#include <linux/mmzone.h>
     23#include <linux/memblock.h>
     24#include <linux/memremap.h>
     25#include <linux/highmem.h>
     26#include <linux/slab.h>
     27#include <linux/spinlock.h>
     28#include <linux/vmalloc.h>
     29#include <linux/sched.h>
     30#include <linux/pgtable.h>
     31#include <linux/bootmem_info.h>
     32
     33#include <asm/dma.h>
     34#include <asm/pgalloc.h>
     35#include <asm/tlbflush.h>
     36
     37#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
     38/**
     39 * struct vmemmap_remap_walk - walk vmemmap page table
     40 *
     41 * @remap_pte:		called for each lowest-level entry (PTE).
     42 * @nr_walked:		the number of walked pte.
     43 * @reuse_page:		the page which is reused for the tail vmemmap pages.
     44 * @reuse_addr:		the virtual address of the @reuse_page page.
     45 * @vmemmap_pages:	the list head of the vmemmap pages that can be freed
     46 *			or is mapped from.
     47 */
     48struct vmemmap_remap_walk {
     49	void (*remap_pte)(pte_t *pte, unsigned long addr,
     50			  struct vmemmap_remap_walk *walk);
     51	unsigned long nr_walked;
     52	struct page *reuse_page;
     53	unsigned long reuse_addr;
     54	struct list_head *vmemmap_pages;
     55};
     56
     57static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
     58{
     59	pmd_t __pmd;
     60	int i;
     61	unsigned long addr = start;
     62	struct page *page = pmd_page(*pmd);
     63	pte_t *pgtable = pte_alloc_one_kernel(&init_mm);
     64
     65	if (!pgtable)
     66		return -ENOMEM;
     67
     68	pmd_populate_kernel(&init_mm, &__pmd, pgtable);
     69
     70	for (i = 0; i < PMD_SIZE / PAGE_SIZE; i++, addr += PAGE_SIZE) {
     71		pte_t entry, *pte;
     72		pgprot_t pgprot = PAGE_KERNEL;
     73
     74		entry = mk_pte(page + i, pgprot);
     75		pte = pte_offset_kernel(&__pmd, addr);
     76		set_pte_at(&init_mm, addr, pte, entry);
     77	}
     78
     79	spin_lock(&init_mm.page_table_lock);
     80	if (likely(pmd_leaf(*pmd))) {
     81		/* Make pte visible before pmd. See comment in pmd_install(). */
     82		smp_wmb();
     83		pmd_populate_kernel(&init_mm, pmd, pgtable);
     84		flush_tlb_kernel_range(start, start + PMD_SIZE);
     85	} else {
     86		pte_free_kernel(&init_mm, pgtable);
     87	}
     88	spin_unlock(&init_mm.page_table_lock);
     89
     90	return 0;
     91}
     92
     93static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
     94{
     95	int leaf;
     96
     97	spin_lock(&init_mm.page_table_lock);
     98	leaf = pmd_leaf(*pmd);
     99	spin_unlock(&init_mm.page_table_lock);
    100
    101	if (!leaf)
    102		return 0;
    103
    104	return __split_vmemmap_huge_pmd(pmd, start);
    105}
    106
    107static void vmemmap_pte_range(pmd_t *pmd, unsigned long addr,
    108			      unsigned long end,
    109			      struct vmemmap_remap_walk *walk)
    110{
    111	pte_t *pte = pte_offset_kernel(pmd, addr);
    112
    113	/*
    114	 * The reuse_page is found 'first' in table walk before we start
    115	 * remapping (which is calling @walk->remap_pte).
    116	 */
    117	if (!walk->reuse_page) {
    118		walk->reuse_page = pte_page(*pte);
    119		/*
    120		 * Because the reuse address is part of the range that we are
    121		 * walking, skip the reuse address range.
    122		 */
    123		addr += PAGE_SIZE;
    124		pte++;
    125		walk->nr_walked++;
    126	}
    127
    128	for (; addr != end; addr += PAGE_SIZE, pte++) {
    129		walk->remap_pte(pte, addr, walk);
    130		walk->nr_walked++;
    131	}
    132}
    133
    134static int vmemmap_pmd_range(pud_t *pud, unsigned long addr,
    135			     unsigned long end,
    136			     struct vmemmap_remap_walk *walk)
    137{
    138	pmd_t *pmd;
    139	unsigned long next;
    140
    141	pmd = pmd_offset(pud, addr);
    142	do {
    143		int ret;
    144
    145		ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK);
    146		if (ret)
    147			return ret;
    148
    149		next = pmd_addr_end(addr, end);
    150		vmemmap_pte_range(pmd, addr, next, walk);
    151	} while (pmd++, addr = next, addr != end);
    152
    153	return 0;
    154}
    155
    156static int vmemmap_pud_range(p4d_t *p4d, unsigned long addr,
    157			     unsigned long end,
    158			     struct vmemmap_remap_walk *walk)
    159{
    160	pud_t *pud;
    161	unsigned long next;
    162
    163	pud = pud_offset(p4d, addr);
    164	do {
    165		int ret;
    166
    167		next = pud_addr_end(addr, end);
    168		ret = vmemmap_pmd_range(pud, addr, next, walk);
    169		if (ret)
    170			return ret;
    171	} while (pud++, addr = next, addr != end);
    172
    173	return 0;
    174}
    175
    176static int vmemmap_p4d_range(pgd_t *pgd, unsigned long addr,
    177			     unsigned long end,
    178			     struct vmemmap_remap_walk *walk)
    179{
    180	p4d_t *p4d;
    181	unsigned long next;
    182
    183	p4d = p4d_offset(pgd, addr);
    184	do {
    185		int ret;
    186
    187		next = p4d_addr_end(addr, end);
    188		ret = vmemmap_pud_range(p4d, addr, next, walk);
    189		if (ret)
    190			return ret;
    191	} while (p4d++, addr = next, addr != end);
    192
    193	return 0;
    194}
    195
    196static int vmemmap_remap_range(unsigned long start, unsigned long end,
    197			       struct vmemmap_remap_walk *walk)
    198{
    199	unsigned long addr = start;
    200	unsigned long next;
    201	pgd_t *pgd;
    202
    203	VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
    204	VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE));
    205
    206	pgd = pgd_offset_k(addr);
    207	do {
    208		int ret;
    209
    210		next = pgd_addr_end(addr, end);
    211		ret = vmemmap_p4d_range(pgd, addr, next, walk);
    212		if (ret)
    213			return ret;
    214	} while (pgd++, addr = next, addr != end);
    215
    216	/*
    217	 * We only change the mapping of the vmemmap virtual address range
    218	 * [@start + PAGE_SIZE, end), so we only need to flush the TLB which
    219	 * belongs to the range.
    220	 */
    221	flush_tlb_kernel_range(start + PAGE_SIZE, end);
    222
    223	return 0;
    224}
    225
    226/*
    227 * Free a vmemmap page. A vmemmap page can be allocated from the memblock
    228 * allocator or buddy allocator. If the PG_reserved flag is set, it means
    229 * that it allocated from the memblock allocator, just free it via the
    230 * free_bootmem_page(). Otherwise, use __free_page().
    231 */
    232static inline void free_vmemmap_page(struct page *page)
    233{
    234	if (PageReserved(page))
    235		free_bootmem_page(page);
    236	else
    237		__free_page(page);
    238}
    239
    240/* Free a list of the vmemmap pages */
    241static void free_vmemmap_page_list(struct list_head *list)
    242{
    243	struct page *page, *next;
    244
    245	list_for_each_entry_safe(page, next, list, lru) {
    246		list_del(&page->lru);
    247		free_vmemmap_page(page);
    248	}
    249}
    250
    251static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
    252			      struct vmemmap_remap_walk *walk)
    253{
    254	/*
    255	 * Remap the tail pages as read-only to catch illegal write operation
    256	 * to the tail pages.
    257	 */
    258	pgprot_t pgprot = PAGE_KERNEL_RO;
    259	pte_t entry = mk_pte(walk->reuse_page, pgprot);
    260	struct page *page = pte_page(*pte);
    261
    262	list_add_tail(&page->lru, walk->vmemmap_pages);
    263	set_pte_at(&init_mm, addr, pte, entry);
    264}
    265
    266/*
    267 * How many struct page structs need to be reset. When we reuse the head
    268 * struct page, the special metadata (e.g. page->flags or page->mapping)
    269 * cannot copy to the tail struct page structs. The invalid value will be
    270 * checked in the free_tail_pages_check(). In order to avoid the message
    271 * of "corrupted mapping in tail page". We need to reset at least 3 (one
    272 * head struct page struct and two tail struct page structs) struct page
    273 * structs.
    274 */
    275#define NR_RESET_STRUCT_PAGE		3
    276
    277static inline void reset_struct_pages(struct page *start)
    278{
    279	int i;
    280	struct page *from = start + NR_RESET_STRUCT_PAGE;
    281
    282	for (i = 0; i < NR_RESET_STRUCT_PAGE; i++)
    283		memcpy(start + i, from, sizeof(*from));
    284}
    285
    286static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
    287				struct vmemmap_remap_walk *walk)
    288{
    289	pgprot_t pgprot = PAGE_KERNEL;
    290	struct page *page;
    291	void *to;
    292
    293	BUG_ON(pte_page(*pte) != walk->reuse_page);
    294
    295	page = list_first_entry(walk->vmemmap_pages, struct page, lru);
    296	list_del(&page->lru);
    297	to = page_to_virt(page);
    298	copy_page(to, (void *)walk->reuse_addr);
    299	reset_struct_pages(to);
    300
    301	set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot));
    302}
    303
    304/**
    305 * vmemmap_remap_free - remap the vmemmap virtual address range [@start, @end)
    306 *			to the page which @reuse is mapped to, then free vmemmap
    307 *			which the range are mapped to.
    308 * @start:	start address of the vmemmap virtual address range that we want
    309 *		to remap.
    310 * @end:	end address of the vmemmap virtual address range that we want to
    311 *		remap.
    312 * @reuse:	reuse address.
    313 *
    314 * Return: %0 on success, negative error code otherwise.
    315 */
    316int vmemmap_remap_free(unsigned long start, unsigned long end,
    317		       unsigned long reuse)
    318{
    319	int ret;
    320	LIST_HEAD(vmemmap_pages);
    321	struct vmemmap_remap_walk walk = {
    322		.remap_pte	= vmemmap_remap_pte,
    323		.reuse_addr	= reuse,
    324		.vmemmap_pages	= &vmemmap_pages,
    325	};
    326
    327	/*
    328	 * In order to make remapping routine most efficient for the huge pages,
    329	 * the routine of vmemmap page table walking has the following rules
    330	 * (see more details from the vmemmap_pte_range()):
    331	 *
    332	 * - The range [@start, @end) and the range [@reuse, @reuse + PAGE_SIZE)
    333	 *   should be continuous.
    334	 * - The @reuse address is part of the range [@reuse, @end) that we are
    335	 *   walking which is passed to vmemmap_remap_range().
    336	 * - The @reuse address is the first in the complete range.
    337	 *
    338	 * So we need to make sure that @start and @reuse meet the above rules.
    339	 */
    340	BUG_ON(start - reuse != PAGE_SIZE);
    341
    342	mmap_read_lock(&init_mm);
    343	ret = vmemmap_remap_range(reuse, end, &walk);
    344	if (ret && walk.nr_walked) {
    345		end = reuse + walk.nr_walked * PAGE_SIZE;
    346		/*
    347		 * vmemmap_pages contains pages from the previous
    348		 * vmemmap_remap_range call which failed.  These
    349		 * are pages which were removed from the vmemmap.
    350		 * They will be restored in the following call.
    351		 */
    352		walk = (struct vmemmap_remap_walk) {
    353			.remap_pte	= vmemmap_restore_pte,
    354			.reuse_addr	= reuse,
    355			.vmemmap_pages	= &vmemmap_pages,
    356		};
    357
    358		vmemmap_remap_range(reuse, end, &walk);
    359	}
    360	mmap_read_unlock(&init_mm);
    361
    362	free_vmemmap_page_list(&vmemmap_pages);
    363
    364	return ret;
    365}
    366
    367static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
    368				   gfp_t gfp_mask, struct list_head *list)
    369{
    370	unsigned long nr_pages = (end - start) >> PAGE_SHIFT;
    371	int nid = page_to_nid((struct page *)start);
    372	struct page *page, *next;
    373
    374	while (nr_pages--) {
    375		page = alloc_pages_node(nid, gfp_mask, 0);
    376		if (!page)
    377			goto out;
    378		list_add_tail(&page->lru, list);
    379	}
    380
    381	return 0;
    382out:
    383	list_for_each_entry_safe(page, next, list, lru)
    384		__free_pages(page, 0);
    385	return -ENOMEM;
    386}
    387
    388/**
    389 * vmemmap_remap_alloc - remap the vmemmap virtual address range [@start, end)
    390 *			 to the page which is from the @vmemmap_pages
    391 *			 respectively.
    392 * @start:	start address of the vmemmap virtual address range that we want
    393 *		to remap.
    394 * @end:	end address of the vmemmap virtual address range that we want to
    395 *		remap.
    396 * @reuse:	reuse address.
    397 * @gfp_mask:	GFP flag for allocating vmemmap pages.
    398 *
    399 * Return: %0 on success, negative error code otherwise.
    400 */
    401int vmemmap_remap_alloc(unsigned long start, unsigned long end,
    402			unsigned long reuse, gfp_t gfp_mask)
    403{
    404	LIST_HEAD(vmemmap_pages);
    405	struct vmemmap_remap_walk walk = {
    406		.remap_pte	= vmemmap_restore_pte,
    407		.reuse_addr	= reuse,
    408		.vmemmap_pages	= &vmemmap_pages,
    409	};
    410
    411	/* See the comment in the vmemmap_remap_free(). */
    412	BUG_ON(start - reuse != PAGE_SIZE);
    413
    414	if (alloc_vmemmap_page_list(start, end, gfp_mask, &vmemmap_pages))
    415		return -ENOMEM;
    416
    417	mmap_read_lock(&init_mm);
    418	vmemmap_remap_range(reuse, end, &walk);
    419	mmap_read_unlock(&init_mm);
    420
    421	return 0;
    422}
    423#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */
    424
    425/*
    426 * Allocate a block of memory to be used to back the virtual memory map
    427 * or to back the page tables that are used to create the mapping.
    428 * Uses the main allocators if they are available, else bootmem.
    429 */
    430
    431static void * __ref __earlyonly_bootmem_alloc(int node,
    432				unsigned long size,
    433				unsigned long align,
    434				unsigned long goal)
    435{
    436	return memblock_alloc_try_nid_raw(size, align, goal,
    437					       MEMBLOCK_ALLOC_ACCESSIBLE, node);
    438}
    439
    440void * __meminit vmemmap_alloc_block(unsigned long size, int node)
    441{
    442	/* If the main allocator is up use that, fallback to bootmem. */
    443	if (slab_is_available()) {
    444		gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
    445		int order = get_order(size);
    446		static bool warned;
    447		struct page *page;
    448
    449		page = alloc_pages_node(node, gfp_mask, order);
    450		if (page)
    451			return page_address(page);
    452
    453		if (!warned) {
    454			warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
    455				   "vmemmap alloc failure: order:%u", order);
    456			warned = true;
    457		}
    458		return NULL;
    459	} else
    460		return __earlyonly_bootmem_alloc(node, size, size,
    461				__pa(MAX_DMA_ADDRESS));
    462}
    463
    464static void * __meminit altmap_alloc_block_buf(unsigned long size,
    465					       struct vmem_altmap *altmap);
    466
    467/* need to make sure size is all the same during early stage */
    468void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node,
    469					 struct vmem_altmap *altmap)
    470{
    471	void *ptr;
    472
    473	if (altmap)
    474		return altmap_alloc_block_buf(size, altmap);
    475
    476	ptr = sparse_buffer_alloc(size);
    477	if (!ptr)
    478		ptr = vmemmap_alloc_block(size, node);
    479	return ptr;
    480}
    481
    482static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
    483{
    484	return altmap->base_pfn + altmap->reserve + altmap->alloc
    485		+ altmap->align;
    486}
    487
    488static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
    489{
    490	unsigned long allocated = altmap->alloc + altmap->align;
    491
    492	if (altmap->free > allocated)
    493		return altmap->free - allocated;
    494	return 0;
    495}
    496
    497static void * __meminit altmap_alloc_block_buf(unsigned long size,
    498					       struct vmem_altmap *altmap)
    499{
    500	unsigned long pfn, nr_pfns, nr_align;
    501
    502	if (size & ~PAGE_MASK) {
    503		pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
    504				__func__, size);
    505		return NULL;
    506	}
    507
    508	pfn = vmem_altmap_next_pfn(altmap);
    509	nr_pfns = size >> PAGE_SHIFT;
    510	nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
    511	nr_align = ALIGN(pfn, nr_align) - pfn;
    512	if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
    513		return NULL;
    514
    515	altmap->alloc += nr_pfns;
    516	altmap->align += nr_align;
    517	pfn += nr_align;
    518
    519	pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
    520			__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
    521	return __va(__pfn_to_phys(pfn));
    522}
    523
    524void __meminit vmemmap_verify(pte_t *pte, int node,
    525				unsigned long start, unsigned long end)
    526{
    527	unsigned long pfn = pte_pfn(*pte);
    528	int actual_node = early_pfn_to_nid(pfn);
    529
    530	if (node_distance(actual_node, node) > LOCAL_DISTANCE)
    531		pr_warn("[%lx-%lx] potential offnode page_structs\n",
    532			start, end - 1);
    533}
    534
    535pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
    536				       struct vmem_altmap *altmap,
    537				       struct page *reuse)
    538{
    539	pte_t *pte = pte_offset_kernel(pmd, addr);
    540	if (pte_none(*pte)) {
    541		pte_t entry;
    542		void *p;
    543
    544		if (!reuse) {
    545			p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
    546			if (!p)
    547				return NULL;
    548		} else {
    549			/*
    550			 * When a PTE/PMD entry is freed from the init_mm
    551			 * there's a a free_pages() call to this page allocated
    552			 * above. Thus this get_page() is paired with the
    553			 * put_page_testzero() on the freeing path.
    554			 * This can only called by certain ZONE_DEVICE path,
    555			 * and through vmemmap_populate_compound_pages() when
    556			 * slab is available.
    557			 */
    558			get_page(reuse);
    559			p = page_to_virt(reuse);
    560		}
    561		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
    562		set_pte_at(&init_mm, addr, pte, entry);
    563	}
    564	return pte;
    565}
    566
    567static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
    568{
    569	void *p = vmemmap_alloc_block(size, node);
    570
    571	if (!p)
    572		return NULL;
    573	memset(p, 0, size);
    574
    575	return p;
    576}
    577
    578pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
    579{
    580	pmd_t *pmd = pmd_offset(pud, addr);
    581	if (pmd_none(*pmd)) {
    582		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
    583		if (!p)
    584			return NULL;
    585		pmd_populate_kernel(&init_mm, pmd, p);
    586	}
    587	return pmd;
    588}
    589
    590pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
    591{
    592	pud_t *pud = pud_offset(p4d, addr);
    593	if (pud_none(*pud)) {
    594		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
    595		if (!p)
    596			return NULL;
    597		pud_populate(&init_mm, pud, p);
    598	}
    599	return pud;
    600}
    601
    602p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
    603{
    604	p4d_t *p4d = p4d_offset(pgd, addr);
    605	if (p4d_none(*p4d)) {
    606		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
    607		if (!p)
    608			return NULL;
    609		p4d_populate(&init_mm, p4d, p);
    610	}
    611	return p4d;
    612}
    613
    614pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
    615{
    616	pgd_t *pgd = pgd_offset_k(addr);
    617	if (pgd_none(*pgd)) {
    618		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
    619		if (!p)
    620			return NULL;
    621		pgd_populate(&init_mm, pgd, p);
    622	}
    623	return pgd;
    624}
    625
    626static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node,
    627					      struct vmem_altmap *altmap,
    628					      struct page *reuse)
    629{
    630	pgd_t *pgd;
    631	p4d_t *p4d;
    632	pud_t *pud;
    633	pmd_t *pmd;
    634	pte_t *pte;
    635
    636	pgd = vmemmap_pgd_populate(addr, node);
    637	if (!pgd)
    638		return NULL;
    639	p4d = vmemmap_p4d_populate(pgd, addr, node);
    640	if (!p4d)
    641		return NULL;
    642	pud = vmemmap_pud_populate(p4d, addr, node);
    643	if (!pud)
    644		return NULL;
    645	pmd = vmemmap_pmd_populate(pud, addr, node);
    646	if (!pmd)
    647		return NULL;
    648	pte = vmemmap_pte_populate(pmd, addr, node, altmap, reuse);
    649	if (!pte)
    650		return NULL;
    651	vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
    652
    653	return pte;
    654}
    655
    656static int __meminit vmemmap_populate_range(unsigned long start,
    657					    unsigned long end, int node,
    658					    struct vmem_altmap *altmap,
    659					    struct page *reuse)
    660{
    661	unsigned long addr = start;
    662	pte_t *pte;
    663
    664	for (; addr < end; addr += PAGE_SIZE) {
    665		pte = vmemmap_populate_address(addr, node, altmap, reuse);
    666		if (!pte)
    667			return -ENOMEM;
    668	}
    669
    670	return 0;
    671}
    672
    673int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
    674					 int node, struct vmem_altmap *altmap)
    675{
    676	return vmemmap_populate_range(start, end, node, altmap, NULL);
    677}
    678
    679/*
    680 * For compound pages bigger than section size (e.g. x86 1G compound
    681 * pages with 2M subsection size) fill the rest of sections as tail
    682 * pages.
    683 *
    684 * Note that memremap_pages() resets @nr_range value and will increment
    685 * it after each range successful onlining. Thus the value or @nr_range
    686 * at section memmap populate corresponds to the in-progress range
    687 * being onlined here.
    688 */
    689static bool __meminit reuse_compound_section(unsigned long start_pfn,
    690					     struct dev_pagemap *pgmap)
    691{
    692	unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
    693	unsigned long offset = start_pfn -
    694		PHYS_PFN(pgmap->ranges[pgmap->nr_range].start);
    695
    696	return !IS_ALIGNED(offset, nr_pages) && nr_pages > PAGES_PER_SUBSECTION;
    697}
    698
    699static pte_t * __meminit compound_section_tail_page(unsigned long addr)
    700{
    701	pte_t *pte;
    702
    703	addr -= PAGE_SIZE;
    704
    705	/*
    706	 * Assuming sections are populated sequentially, the previous section's
    707	 * page data can be reused.
    708	 */
    709	pte = pte_offset_kernel(pmd_off_k(addr), addr);
    710	if (!pte)
    711		return NULL;
    712
    713	return pte;
    714}
    715
    716static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
    717						     unsigned long start,
    718						     unsigned long end, int node,
    719						     struct dev_pagemap *pgmap)
    720{
    721	unsigned long size, addr;
    722	pte_t *pte;
    723	int rc;
    724
    725	if (reuse_compound_section(start_pfn, pgmap)) {
    726		pte = compound_section_tail_page(start);
    727		if (!pte)
    728			return -ENOMEM;
    729
    730		/*
    731		 * Reuse the page that was populated in the prior iteration
    732		 * with just tail struct pages.
    733		 */
    734		return vmemmap_populate_range(start, end, node, NULL,
    735					      pte_page(*pte));
    736	}
    737
    738	size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page));
    739	for (addr = start; addr < end; addr += size) {
    740		unsigned long next = addr, last = addr + size;
    741
    742		/* Populate the head page vmemmap page */
    743		pte = vmemmap_populate_address(addr, node, NULL, NULL);
    744		if (!pte)
    745			return -ENOMEM;
    746
    747		/* Populate the tail pages vmemmap page */
    748		next = addr + PAGE_SIZE;
    749		pte = vmemmap_populate_address(next, node, NULL, NULL);
    750		if (!pte)
    751			return -ENOMEM;
    752
    753		/*
    754		 * Reuse the previous page for the rest of tail pages
    755		 * See layout diagram in Documentation/vm/vmemmap_dedup.rst
    756		 */
    757		next += PAGE_SIZE;
    758		rc = vmemmap_populate_range(next, last, node, NULL,
    759					    pte_page(*pte));
    760		if (rc)
    761			return -ENOMEM;
    762	}
    763
    764	return 0;
    765}
    766
    767struct page * __meminit __populate_section_memmap(unsigned long pfn,
    768		unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
    769		struct dev_pagemap *pgmap)
    770{
    771	unsigned long start = (unsigned long) pfn_to_page(pfn);
    772	unsigned long end = start + nr_pages * sizeof(struct page);
    773	int r;
    774
    775	if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) ||
    776		!IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
    777		return NULL;
    778
    779	if (is_power_of_2(sizeof(struct page)) &&
    780	    pgmap && pgmap_vmemmap_nr(pgmap) > 1 && !altmap)
    781		r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
    782	else
    783		r = vmemmap_populate(start, end, nid, altmap);
    784
    785	if (r < 0)
    786		return NULL;
    787
    788	return pfn_to_page(pfn);
    789}