cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

pgtable.c (30577B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Stand-alone page-table allocator for hyp stage-1 and guest stage-2.
      4 * No bombay mix was harmed in the writing of this file.
      5 *
      6 * Copyright (C) 2020 Google LLC
      7 * Author: Will Deacon <will@kernel.org>
      8 */
      9
     10#include <linux/bitfield.h>
     11#include <asm/kvm_pgtable.h>
     12#include <asm/stage2_pgtable.h>
     13
     14
     15#define KVM_PTE_TYPE			BIT(1)
     16#define KVM_PTE_TYPE_BLOCK		0
     17#define KVM_PTE_TYPE_PAGE		1
     18#define KVM_PTE_TYPE_TABLE		1
     19
     20#define KVM_PTE_LEAF_ATTR_LO		GENMASK(11, 2)
     21
     22#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX	GENMASK(4, 2)
     23#define KVM_PTE_LEAF_ATTR_LO_S1_AP	GENMASK(7, 6)
     24#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO	3
     25#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW	1
     26#define KVM_PTE_LEAF_ATTR_LO_S1_SH	GENMASK(9, 8)
     27#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS	3
     28#define KVM_PTE_LEAF_ATTR_LO_S1_AF	BIT(10)
     29
     30#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR	GENMASK(5, 2)
     31#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R	BIT(6)
     32#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W	BIT(7)
     33#define KVM_PTE_LEAF_ATTR_LO_S2_SH	GENMASK(9, 8)
     34#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS	3
     35#define KVM_PTE_LEAF_ATTR_LO_S2_AF	BIT(10)
     36
     37#define KVM_PTE_LEAF_ATTR_HI		GENMASK(63, 51)
     38
     39#define KVM_PTE_LEAF_ATTR_HI_SW		GENMASK(58, 55)
     40
     41#define KVM_PTE_LEAF_ATTR_HI_S1_XN	BIT(54)
     42
     43#define KVM_PTE_LEAF_ATTR_HI_S2_XN	BIT(54)
     44
     45#define KVM_PTE_LEAF_ATTR_S2_PERMS	(KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
     46					 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
     47					 KVM_PTE_LEAF_ATTR_HI_S2_XN)
     48
     49#define KVM_INVALID_PTE_OWNER_MASK	GENMASK(9, 2)
     50#define KVM_MAX_OWNER_ID		1
     51
     52struct kvm_pgtable_walk_data {
     53	struct kvm_pgtable		*pgt;
     54	struct kvm_pgtable_walker	*walker;
     55
     56	u64				addr;
     57	u64				end;
     58};
     59
     60#define KVM_PHYS_INVALID (-1ULL)
     61
     62static bool kvm_phys_is_valid(u64 phys)
     63{
     64	return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
     65}
     66
     67static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
     68{
     69	u64 granule = kvm_granule_size(level);
     70
     71	if (!kvm_level_supports_block_mapping(level))
     72		return false;
     73
     74	if (granule > (end - addr))
     75		return false;
     76
     77	if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
     78		return false;
     79
     80	return IS_ALIGNED(addr, granule);
     81}
     82
     83static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
     84{
     85	u64 shift = kvm_granule_shift(level);
     86	u64 mask = BIT(PAGE_SHIFT - 3) - 1;
     87
     88	return (data->addr >> shift) & mask;
     89}
     90
     91static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
     92{
     93	u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
     94	u64 mask = BIT(pgt->ia_bits) - 1;
     95
     96	return (addr & mask) >> shift;
     97}
     98
     99static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data)
    100{
    101	return __kvm_pgd_page_idx(data->pgt, data->addr);
    102}
    103
    104static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
    105{
    106	struct kvm_pgtable pgt = {
    107		.ia_bits	= ia_bits,
    108		.start_level	= start_level,
    109	};
    110
    111	return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
    112}
    113
    114static bool kvm_pte_table(kvm_pte_t pte, u32 level)
    115{
    116	if (level == KVM_PGTABLE_MAX_LEVELS - 1)
    117		return false;
    118
    119	if (!kvm_pte_valid(pte))
    120		return false;
    121
    122	return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
    123}
    124
    125static kvm_pte_t kvm_phys_to_pte(u64 pa)
    126{
    127	kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
    128
    129	if (PAGE_SHIFT == 16)
    130		pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
    131
    132	return pte;
    133}
    134
    135static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops)
    136{
    137	return mm_ops->phys_to_virt(kvm_pte_to_phys(pte));
    138}
    139
    140static void kvm_clear_pte(kvm_pte_t *ptep)
    141{
    142	WRITE_ONCE(*ptep, 0);
    143}
    144
    145static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp,
    146			      struct kvm_pgtable_mm_ops *mm_ops)
    147{
    148	kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp));
    149
    150	pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
    151	pte |= KVM_PTE_VALID;
    152
    153	WARN_ON(kvm_pte_valid(old));
    154	smp_store_release(ptep, pte);
    155}
    156
    157static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
    158{
    159	kvm_pte_t pte = kvm_phys_to_pte(pa);
    160	u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
    161							   KVM_PTE_TYPE_BLOCK;
    162
    163	pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
    164	pte |= FIELD_PREP(KVM_PTE_TYPE, type);
    165	pte |= KVM_PTE_VALID;
    166
    167	return pte;
    168}
    169
    170static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
    171{
    172	return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
    173}
    174
    175static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
    176				  u32 level, kvm_pte_t *ptep,
    177				  enum kvm_pgtable_walk_flags flag)
    178{
    179	struct kvm_pgtable_walker *walker = data->walker;
    180	return walker->cb(addr, data->end, level, ptep, flag, walker->arg);
    181}
    182
    183static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
    184			      kvm_pte_t *pgtable, u32 level);
    185
    186static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
    187				      kvm_pte_t *ptep, u32 level)
    188{
    189	int ret = 0;
    190	u64 addr = data->addr;
    191	kvm_pte_t *childp, pte = *ptep;
    192	bool table = kvm_pte_table(pte, level);
    193	enum kvm_pgtable_walk_flags flags = data->walker->flags;
    194
    195	if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
    196		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
    197					     KVM_PGTABLE_WALK_TABLE_PRE);
    198	}
    199
    200	if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) {
    201		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
    202					     KVM_PGTABLE_WALK_LEAF);
    203		pte = *ptep;
    204		table = kvm_pte_table(pte, level);
    205	}
    206
    207	if (ret)
    208		goto out;
    209
    210	if (!table) {
    211		data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
    212		data->addr += kvm_granule_size(level);
    213		goto out;
    214	}
    215
    216	childp = kvm_pte_follow(pte, data->pgt->mm_ops);
    217	ret = __kvm_pgtable_walk(data, childp, level + 1);
    218	if (ret)
    219		goto out;
    220
    221	if (flags & KVM_PGTABLE_WALK_TABLE_POST) {
    222		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
    223					     KVM_PGTABLE_WALK_TABLE_POST);
    224	}
    225
    226out:
    227	return ret;
    228}
    229
    230static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
    231			      kvm_pte_t *pgtable, u32 level)
    232{
    233	u32 idx;
    234	int ret = 0;
    235
    236	if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
    237		return -EINVAL;
    238
    239	for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
    240		kvm_pte_t *ptep = &pgtable[idx];
    241
    242		if (data->addr >= data->end)
    243			break;
    244
    245		ret = __kvm_pgtable_visit(data, ptep, level);
    246		if (ret)
    247			break;
    248	}
    249
    250	return ret;
    251}
    252
    253static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data)
    254{
    255	u32 idx;
    256	int ret = 0;
    257	struct kvm_pgtable *pgt = data->pgt;
    258	u64 limit = BIT(pgt->ia_bits);
    259
    260	if (data->addr > limit || data->end > limit)
    261		return -ERANGE;
    262
    263	if (!pgt->pgd)
    264		return -EINVAL;
    265
    266	for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) {
    267		kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE];
    268
    269		ret = __kvm_pgtable_walk(data, ptep, pgt->start_level);
    270		if (ret)
    271			break;
    272	}
    273
    274	return ret;
    275}
    276
    277int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
    278		     struct kvm_pgtable_walker *walker)
    279{
    280	struct kvm_pgtable_walk_data walk_data = {
    281		.pgt	= pgt,
    282		.addr	= ALIGN_DOWN(addr, PAGE_SIZE),
    283		.end	= PAGE_ALIGN(walk_data.addr + size),
    284		.walker	= walker,
    285	};
    286
    287	return _kvm_pgtable_walk(&walk_data);
    288}
    289
    290struct leaf_walk_data {
    291	kvm_pte_t	pte;
    292	u32		level;
    293};
    294
    295static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
    296		       enum kvm_pgtable_walk_flags flag, void * const arg)
    297{
    298	struct leaf_walk_data *data = arg;
    299
    300	data->pte   = *ptep;
    301	data->level = level;
    302
    303	return 0;
    304}
    305
    306int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
    307			 kvm_pte_t *ptep, u32 *level)
    308{
    309	struct leaf_walk_data data;
    310	struct kvm_pgtable_walker walker = {
    311		.cb	= leaf_walker,
    312		.flags	= KVM_PGTABLE_WALK_LEAF,
    313		.arg	= &data,
    314	};
    315	int ret;
    316
    317	ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
    318			       PAGE_SIZE, &walker);
    319	if (!ret) {
    320		if (ptep)
    321			*ptep  = data.pte;
    322		if (level)
    323			*level = data.level;
    324	}
    325
    326	return ret;
    327}
    328
    329struct hyp_map_data {
    330	u64				phys;
    331	kvm_pte_t			attr;
    332	struct kvm_pgtable_mm_ops	*mm_ops;
    333};
    334
    335static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
    336{
    337	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
    338	u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL;
    339	kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype);
    340	u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS;
    341	u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW :
    342					       KVM_PTE_LEAF_ATTR_LO_S1_AP_RO;
    343
    344	if (!(prot & KVM_PGTABLE_PROT_R))
    345		return -EINVAL;
    346
    347	if (prot & KVM_PGTABLE_PROT_X) {
    348		if (prot & KVM_PGTABLE_PROT_W)
    349			return -EINVAL;
    350
    351		if (device)
    352			return -EINVAL;
    353	} else {
    354		attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
    355	}
    356
    357	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
    358	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
    359	attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
    360	attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
    361	*ptep = attr;
    362
    363	return 0;
    364}
    365
    366enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
    367{
    368	enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
    369	u32 ap;
    370
    371	if (!kvm_pte_valid(pte))
    372		return prot;
    373
    374	if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN))
    375		prot |= KVM_PGTABLE_PROT_X;
    376
    377	ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte);
    378	if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO)
    379		prot |= KVM_PGTABLE_PROT_R;
    380	else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW)
    381		prot |= KVM_PGTABLE_PROT_RW;
    382
    383	return prot;
    384}
    385
    386static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
    387				    kvm_pte_t *ptep, struct hyp_map_data *data)
    388{
    389	kvm_pte_t new, old = *ptep;
    390	u64 granule = kvm_granule_size(level), phys = data->phys;
    391
    392	if (!kvm_block_mapping_supported(addr, end, phys, level))
    393		return false;
    394
    395	data->phys += granule;
    396	new = kvm_init_valid_leaf_pte(phys, data->attr, level);
    397	if (old == new)
    398		return true;
    399	if (!kvm_pte_valid(old))
    400		data->mm_ops->get_page(ptep);
    401	else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
    402		return false;
    403
    404	smp_store_release(ptep, new);
    405	return true;
    406}
    407
    408static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
    409			  enum kvm_pgtable_walk_flags flag, void * const arg)
    410{
    411	kvm_pte_t *childp;
    412	struct hyp_map_data *data = arg;
    413	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
    414
    415	if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg))
    416		return 0;
    417
    418	if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
    419		return -EINVAL;
    420
    421	childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
    422	if (!childp)
    423		return -ENOMEM;
    424
    425	kvm_set_table_pte(ptep, childp, mm_ops);
    426	mm_ops->get_page(ptep);
    427	return 0;
    428}
    429
    430int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
    431			enum kvm_pgtable_prot prot)
    432{
    433	int ret;
    434	struct hyp_map_data map_data = {
    435		.phys	= ALIGN_DOWN(phys, PAGE_SIZE),
    436		.mm_ops	= pgt->mm_ops,
    437	};
    438	struct kvm_pgtable_walker walker = {
    439		.cb	= hyp_map_walker,
    440		.flags	= KVM_PGTABLE_WALK_LEAF,
    441		.arg	= &map_data,
    442	};
    443
    444	ret = hyp_set_prot_attr(prot, &map_data.attr);
    445	if (ret)
    446		return ret;
    447
    448	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
    449	dsb(ishst);
    450	isb();
    451	return ret;
    452}
    453
    454struct hyp_unmap_data {
    455	u64				unmapped;
    456	struct kvm_pgtable_mm_ops	*mm_ops;
    457};
    458
    459static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
    460			    enum kvm_pgtable_walk_flags flag, void * const arg)
    461{
    462	kvm_pte_t pte = *ptep, *childp = NULL;
    463	u64 granule = kvm_granule_size(level);
    464	struct hyp_unmap_data *data = arg;
    465	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
    466
    467	if (!kvm_pte_valid(pte))
    468		return -EINVAL;
    469
    470	if (kvm_pte_table(pte, level)) {
    471		childp = kvm_pte_follow(pte, mm_ops);
    472
    473		if (mm_ops->page_count(childp) != 1)
    474			return 0;
    475
    476		kvm_clear_pte(ptep);
    477		dsb(ishst);
    478		__tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
    479	} else {
    480		if (end - addr < granule)
    481			return -EINVAL;
    482
    483		kvm_clear_pte(ptep);
    484		dsb(ishst);
    485		__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
    486		data->unmapped += granule;
    487	}
    488
    489	dsb(ish);
    490	isb();
    491	mm_ops->put_page(ptep);
    492
    493	if (childp)
    494		mm_ops->put_page(childp);
    495
    496	return 0;
    497}
    498
    499u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
    500{
    501	struct hyp_unmap_data unmap_data = {
    502		.mm_ops	= pgt->mm_ops,
    503	};
    504	struct kvm_pgtable_walker walker = {
    505		.cb	= hyp_unmap_walker,
    506		.arg	= &unmap_data,
    507		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
    508	};
    509
    510	if (!pgt->mm_ops->page_count)
    511		return 0;
    512
    513	kvm_pgtable_walk(pgt, addr, size, &walker);
    514	return unmap_data.unmapped;
    515}
    516
    517int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
    518			 struct kvm_pgtable_mm_ops *mm_ops)
    519{
    520	u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
    521
    522	pgt->pgd = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
    523	if (!pgt->pgd)
    524		return -ENOMEM;
    525
    526	pgt->ia_bits		= va_bits;
    527	pgt->start_level	= KVM_PGTABLE_MAX_LEVELS - levels;
    528	pgt->mm_ops		= mm_ops;
    529	pgt->mmu		= NULL;
    530	pgt->force_pte_cb	= NULL;
    531
    532	return 0;
    533}
    534
    535static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
    536			   enum kvm_pgtable_walk_flags flag, void * const arg)
    537{
    538	struct kvm_pgtable_mm_ops *mm_ops = arg;
    539	kvm_pte_t pte = *ptep;
    540
    541	if (!kvm_pte_valid(pte))
    542		return 0;
    543
    544	mm_ops->put_page(ptep);
    545
    546	if (kvm_pte_table(pte, level))
    547		mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
    548
    549	return 0;
    550}
    551
    552void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
    553{
    554	struct kvm_pgtable_walker walker = {
    555		.cb	= hyp_free_walker,
    556		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
    557		.arg	= pgt->mm_ops,
    558	};
    559
    560	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
    561	pgt->mm_ops->put_page(pgt->pgd);
    562	pgt->pgd = NULL;
    563}
    564
    565struct stage2_map_data {
    566	u64				phys;
    567	kvm_pte_t			attr;
    568	u8				owner_id;
    569
    570	kvm_pte_t			*anchor;
    571	kvm_pte_t			*childp;
    572
    573	struct kvm_s2_mmu		*mmu;
    574	void				*memcache;
    575
    576	struct kvm_pgtable_mm_ops	*mm_ops;
    577
    578	/* Force mappings to page granularity */
    579	bool				force_pte;
    580};
    581
    582u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
    583{
    584	u64 vtcr = VTCR_EL2_FLAGS;
    585	u8 lvls;
    586
    587	vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
    588	vtcr |= VTCR_EL2_T0SZ(phys_shift);
    589	/*
    590	 * Use a minimum 2 level page table to prevent splitting
    591	 * host PMD huge pages at stage2.
    592	 */
    593	lvls = stage2_pgtable_levels(phys_shift);
    594	if (lvls < 2)
    595		lvls = 2;
    596	vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
    597
    598	/*
    599	 * Enable the Hardware Access Flag management, unconditionally
    600	 * on all CPUs. The features is RES0 on CPUs without the support
    601	 * and must be ignored by the CPUs.
    602	 */
    603	vtcr |= VTCR_EL2_HA;
    604
    605	/* Set the vmid bits */
    606	vtcr |= (get_vmid_bits(mmfr1) == 16) ?
    607		VTCR_EL2_VS_16BIT :
    608		VTCR_EL2_VS_8BIT;
    609
    610	return vtcr;
    611}
    612
    613static bool stage2_has_fwb(struct kvm_pgtable *pgt)
    614{
    615	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
    616		return false;
    617
    618	return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
    619}
    620
    621#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
    622
    623static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
    624				kvm_pte_t *ptep)
    625{
    626	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
    627	kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) :
    628			    KVM_S2_MEMATTR(pgt, NORMAL);
    629	u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
    630
    631	if (!(prot & KVM_PGTABLE_PROT_X))
    632		attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
    633	else if (device)
    634		return -EINVAL;
    635
    636	if (prot & KVM_PGTABLE_PROT_R)
    637		attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
    638
    639	if (prot & KVM_PGTABLE_PROT_W)
    640		attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
    641
    642	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
    643	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
    644	attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
    645	*ptep = attr;
    646
    647	return 0;
    648}
    649
    650enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
    651{
    652	enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
    653
    654	if (!kvm_pte_valid(pte))
    655		return prot;
    656
    657	if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
    658		prot |= KVM_PGTABLE_PROT_R;
    659	if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
    660		prot |= KVM_PGTABLE_PROT_W;
    661	if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
    662		prot |= KVM_PGTABLE_PROT_X;
    663
    664	return prot;
    665}
    666
    667static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
    668{
    669	if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
    670		return true;
    671
    672	return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS));
    673}
    674
    675static bool stage2_pte_is_counted(kvm_pte_t pte)
    676{
    677	/*
    678	 * The refcount tracks valid entries as well as invalid entries if they
    679	 * encode ownership of a page to another entity than the page-table
    680	 * owner, whose id is 0.
    681	 */
    682	return !!pte;
    683}
    684
    685static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
    686			   u32 level, struct kvm_pgtable_mm_ops *mm_ops)
    687{
    688	/*
    689	 * Clear the existing PTE, and perform break-before-make with
    690	 * TLB maintenance if it was valid.
    691	 */
    692	if (kvm_pte_valid(*ptep)) {
    693		kvm_clear_pte(ptep);
    694		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
    695	}
    696
    697	mm_ops->put_page(ptep);
    698}
    699
    700static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
    701{
    702	u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
    703	return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
    704}
    705
    706static bool stage2_pte_executable(kvm_pte_t pte)
    707{
    708	return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
    709}
    710
    711static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level,
    712					struct stage2_map_data *data)
    713{
    714	if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1)))
    715		return false;
    716
    717	return kvm_block_mapping_supported(addr, end, data->phys, level);
    718}
    719
    720static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
    721				      kvm_pte_t *ptep,
    722				      struct stage2_map_data *data)
    723{
    724	kvm_pte_t new, old = *ptep;
    725	u64 granule = kvm_granule_size(level), phys = data->phys;
    726	struct kvm_pgtable *pgt = data->mmu->pgt;
    727	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
    728
    729	if (!stage2_leaf_mapping_allowed(addr, end, level, data))
    730		return -E2BIG;
    731
    732	if (kvm_phys_is_valid(phys))
    733		new = kvm_init_valid_leaf_pte(phys, data->attr, level);
    734	else
    735		new = kvm_init_invalid_leaf_owner(data->owner_id);
    736
    737	if (stage2_pte_is_counted(old)) {
    738		/*
    739		 * Skip updating the PTE if we are trying to recreate the exact
    740		 * same mapping or only change the access permissions. Instead,
    741		 * the vCPU will exit one more time from guest if still needed
    742		 * and then go through the path of relaxing permissions.
    743		 */
    744		if (!stage2_pte_needs_update(old, new))
    745			return -EAGAIN;
    746
    747		stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
    748	}
    749
    750	/* Perform CMOs before installation of the guest stage-2 PTE */
    751	if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
    752		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
    753						granule);
    754
    755	if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
    756		mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
    757
    758	smp_store_release(ptep, new);
    759	if (stage2_pte_is_counted(new))
    760		mm_ops->get_page(ptep);
    761	if (kvm_phys_is_valid(phys))
    762		data->phys += granule;
    763	return 0;
    764}
    765
    766static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
    767				     kvm_pte_t *ptep,
    768				     struct stage2_map_data *data)
    769{
    770	if (data->anchor)
    771		return 0;
    772
    773	if (!stage2_leaf_mapping_allowed(addr, end, level, data))
    774		return 0;
    775
    776	data->childp = kvm_pte_follow(*ptep, data->mm_ops);
    777	kvm_clear_pte(ptep);
    778
    779	/*
    780	 * Invalidate the whole stage-2, as we may have numerous leaf
    781	 * entries below us which would otherwise need invalidating
    782	 * individually.
    783	 */
    784	kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
    785	data->anchor = ptep;
    786	return 0;
    787}
    788
    789static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
    790				struct stage2_map_data *data)
    791{
    792	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
    793	kvm_pte_t *childp, pte = *ptep;
    794	int ret;
    795
    796	if (data->anchor) {
    797		if (stage2_pte_is_counted(pte))
    798			mm_ops->put_page(ptep);
    799
    800		return 0;
    801	}
    802
    803	ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data);
    804	if (ret != -E2BIG)
    805		return ret;
    806
    807	if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
    808		return -EINVAL;
    809
    810	if (!data->memcache)
    811		return -ENOMEM;
    812
    813	childp = mm_ops->zalloc_page(data->memcache);
    814	if (!childp)
    815		return -ENOMEM;
    816
    817	/*
    818	 * If we've run into an existing block mapping then replace it with
    819	 * a table. Accesses beyond 'end' that fall within the new table
    820	 * will be mapped lazily.
    821	 */
    822	if (stage2_pte_is_counted(pte))
    823		stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
    824
    825	kvm_set_table_pte(ptep, childp, mm_ops);
    826	mm_ops->get_page(ptep);
    827
    828	return 0;
    829}
    830
    831static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
    832				      kvm_pte_t *ptep,
    833				      struct stage2_map_data *data)
    834{
    835	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
    836	kvm_pte_t *childp;
    837	int ret = 0;
    838
    839	if (!data->anchor)
    840		return 0;
    841
    842	if (data->anchor == ptep) {
    843		childp = data->childp;
    844		data->anchor = NULL;
    845		data->childp = NULL;
    846		ret = stage2_map_walk_leaf(addr, end, level, ptep, data);
    847	} else {
    848		childp = kvm_pte_follow(*ptep, mm_ops);
    849	}
    850
    851	mm_ops->put_page(childp);
    852	mm_ops->put_page(ptep);
    853
    854	return ret;
    855}
    856
    857/*
    858 * This is a little fiddly, as we use all three of the walk flags. The idea
    859 * is that the TABLE_PRE callback runs for table entries on the way down,
    860 * looking for table entries which we could conceivably replace with a
    861 * block entry for this mapping. If it finds one, then it sets the 'anchor'
    862 * field in 'struct stage2_map_data' to point at the table entry, before
    863 * clearing the entry to zero and descending into the now detached table.
    864 *
    865 * The behaviour of the LEAF callback then depends on whether or not the
    866 * anchor has been set. If not, then we're not using a block mapping higher
    867 * up the table and we perform the mapping at the existing leaves instead.
    868 * If, on the other hand, the anchor _is_ set, then we drop references to
    869 * all valid leaves so that the pages beneath the anchor can be freed.
    870 *
    871 * Finally, the TABLE_POST callback does nothing if the anchor has not
    872 * been set, but otherwise frees the page-table pages while walking back up
    873 * the page-table, installing the block entry when it revisits the anchor
    874 * pointer and clearing the anchor to NULL.
    875 */
    876static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
    877			     enum kvm_pgtable_walk_flags flag, void * const arg)
    878{
    879	struct stage2_map_data *data = arg;
    880
    881	switch (flag) {
    882	case KVM_PGTABLE_WALK_TABLE_PRE:
    883		return stage2_map_walk_table_pre(addr, end, level, ptep, data);
    884	case KVM_PGTABLE_WALK_LEAF:
    885		return stage2_map_walk_leaf(addr, end, level, ptep, data);
    886	case KVM_PGTABLE_WALK_TABLE_POST:
    887		return stage2_map_walk_table_post(addr, end, level, ptep, data);
    888	}
    889
    890	return -EINVAL;
    891}
    892
    893int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
    894			   u64 phys, enum kvm_pgtable_prot prot,
    895			   void *mc)
    896{
    897	int ret;
    898	struct stage2_map_data map_data = {
    899		.phys		= ALIGN_DOWN(phys, PAGE_SIZE),
    900		.mmu		= pgt->mmu,
    901		.memcache	= mc,
    902		.mm_ops		= pgt->mm_ops,
    903		.force_pte	= pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot),
    904	};
    905	struct kvm_pgtable_walker walker = {
    906		.cb		= stage2_map_walker,
    907		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
    908				  KVM_PGTABLE_WALK_LEAF |
    909				  KVM_PGTABLE_WALK_TABLE_POST,
    910		.arg		= &map_data,
    911	};
    912
    913	if (WARN_ON((pgt->flags & KVM_PGTABLE_S2_IDMAP) && (addr != phys)))
    914		return -EINVAL;
    915
    916	ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
    917	if (ret)
    918		return ret;
    919
    920	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
    921	dsb(ishst);
    922	return ret;
    923}
    924
    925int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
    926				 void *mc, u8 owner_id)
    927{
    928	int ret;
    929	struct stage2_map_data map_data = {
    930		.phys		= KVM_PHYS_INVALID,
    931		.mmu		= pgt->mmu,
    932		.memcache	= mc,
    933		.mm_ops		= pgt->mm_ops,
    934		.owner_id	= owner_id,
    935		.force_pte	= true,
    936	};
    937	struct kvm_pgtable_walker walker = {
    938		.cb		= stage2_map_walker,
    939		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
    940				  KVM_PGTABLE_WALK_LEAF |
    941				  KVM_PGTABLE_WALK_TABLE_POST,
    942		.arg		= &map_data,
    943	};
    944
    945	if (owner_id > KVM_MAX_OWNER_ID)
    946		return -EINVAL;
    947
    948	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
    949	return ret;
    950}
    951
    952static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
    953			       enum kvm_pgtable_walk_flags flag,
    954			       void * const arg)
    955{
    956	struct kvm_pgtable *pgt = arg;
    957	struct kvm_s2_mmu *mmu = pgt->mmu;
    958	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
    959	kvm_pte_t pte = *ptep, *childp = NULL;
    960	bool need_flush = false;
    961
    962	if (!kvm_pte_valid(pte)) {
    963		if (stage2_pte_is_counted(pte)) {
    964			kvm_clear_pte(ptep);
    965			mm_ops->put_page(ptep);
    966		}
    967		return 0;
    968	}
    969
    970	if (kvm_pte_table(pte, level)) {
    971		childp = kvm_pte_follow(pte, mm_ops);
    972
    973		if (mm_ops->page_count(childp) != 1)
    974			return 0;
    975	} else if (stage2_pte_cacheable(pgt, pte)) {
    976		need_flush = !stage2_has_fwb(pgt);
    977	}
    978
    979	/*
    980	 * This is similar to the map() path in that we unmap the entire
    981	 * block entry and rely on the remaining portions being faulted
    982	 * back lazily.
    983	 */
    984	stage2_put_pte(ptep, mmu, addr, level, mm_ops);
    985
    986	if (need_flush && mm_ops->dcache_clean_inval_poc)
    987		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
    988					       kvm_granule_size(level));
    989
    990	if (childp)
    991		mm_ops->put_page(childp);
    992
    993	return 0;
    994}
    995
    996int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
    997{
    998	struct kvm_pgtable_walker walker = {
    999		.cb	= stage2_unmap_walker,
   1000		.arg	= pgt,
   1001		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
   1002	};
   1003
   1004	return kvm_pgtable_walk(pgt, addr, size, &walker);
   1005}
   1006
   1007struct stage2_attr_data {
   1008	kvm_pte_t			attr_set;
   1009	kvm_pte_t			attr_clr;
   1010	kvm_pte_t			pte;
   1011	u32				level;
   1012	struct kvm_pgtable_mm_ops	*mm_ops;
   1013};
   1014
   1015static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
   1016			      enum kvm_pgtable_walk_flags flag,
   1017			      void * const arg)
   1018{
   1019	kvm_pte_t pte = *ptep;
   1020	struct stage2_attr_data *data = arg;
   1021	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
   1022
   1023	if (!kvm_pte_valid(pte))
   1024		return 0;
   1025
   1026	data->level = level;
   1027	data->pte = pte;
   1028	pte &= ~data->attr_clr;
   1029	pte |= data->attr_set;
   1030
   1031	/*
   1032	 * We may race with the CPU trying to set the access flag here,
   1033	 * but worst-case the access flag update gets lost and will be
   1034	 * set on the next access instead.
   1035	 */
   1036	if (data->pte != pte) {
   1037		/*
   1038		 * Invalidate instruction cache before updating the guest
   1039		 * stage-2 PTE if we are going to add executable permission.
   1040		 */
   1041		if (mm_ops->icache_inval_pou &&
   1042		    stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
   1043			mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
   1044						  kvm_granule_size(level));
   1045		WRITE_ONCE(*ptep, pte);
   1046	}
   1047
   1048	return 0;
   1049}
   1050
   1051static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
   1052				    u64 size, kvm_pte_t attr_set,
   1053				    kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
   1054				    u32 *level)
   1055{
   1056	int ret;
   1057	kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
   1058	struct stage2_attr_data data = {
   1059		.attr_set	= attr_set & attr_mask,
   1060		.attr_clr	= attr_clr & attr_mask,
   1061		.mm_ops		= pgt->mm_ops,
   1062	};
   1063	struct kvm_pgtable_walker walker = {
   1064		.cb		= stage2_attr_walker,
   1065		.arg		= &data,
   1066		.flags		= KVM_PGTABLE_WALK_LEAF,
   1067	};
   1068
   1069	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
   1070	if (ret)
   1071		return ret;
   1072
   1073	if (orig_pte)
   1074		*orig_pte = data.pte;
   1075
   1076	if (level)
   1077		*level = data.level;
   1078	return 0;
   1079}
   1080
   1081int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
   1082{
   1083	return stage2_update_leaf_attrs(pgt, addr, size, 0,
   1084					KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
   1085					NULL, NULL);
   1086}
   1087
   1088kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
   1089{
   1090	kvm_pte_t pte = 0;
   1091	stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
   1092				 &pte, NULL);
   1093	dsb(ishst);
   1094	return pte;
   1095}
   1096
   1097kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
   1098{
   1099	kvm_pte_t pte = 0;
   1100	stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
   1101				 &pte, NULL);
   1102	/*
   1103	 * "But where's the TLBI?!", you scream.
   1104	 * "Over in the core code", I sigh.
   1105	 *
   1106	 * See the '->clear_flush_young()' callback on the KVM mmu notifier.
   1107	 */
   1108	return pte;
   1109}
   1110
   1111bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
   1112{
   1113	kvm_pte_t pte = 0;
   1114	stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL);
   1115	return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
   1116}
   1117
   1118int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
   1119				   enum kvm_pgtable_prot prot)
   1120{
   1121	int ret;
   1122	u32 level;
   1123	kvm_pte_t set = 0, clr = 0;
   1124
   1125	if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
   1126		return -EINVAL;
   1127
   1128	if (prot & KVM_PGTABLE_PROT_R)
   1129		set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
   1130
   1131	if (prot & KVM_PGTABLE_PROT_W)
   1132		set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
   1133
   1134	if (prot & KVM_PGTABLE_PROT_X)
   1135		clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
   1136
   1137	ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level);
   1138	if (!ret)
   1139		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
   1140	return ret;
   1141}
   1142
   1143static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
   1144			       enum kvm_pgtable_walk_flags flag,
   1145			       void * const arg)
   1146{
   1147	struct kvm_pgtable *pgt = arg;
   1148	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
   1149	kvm_pte_t pte = *ptep;
   1150
   1151	if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
   1152		return 0;
   1153
   1154	if (mm_ops->dcache_clean_inval_poc)
   1155		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
   1156					       kvm_granule_size(level));
   1157	return 0;
   1158}
   1159
   1160int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
   1161{
   1162	struct kvm_pgtable_walker walker = {
   1163		.cb	= stage2_flush_walker,
   1164		.flags	= KVM_PGTABLE_WALK_LEAF,
   1165		.arg	= pgt,
   1166	};
   1167
   1168	if (stage2_has_fwb(pgt))
   1169		return 0;
   1170
   1171	return kvm_pgtable_walk(pgt, addr, size, &walker);
   1172}
   1173
   1174
   1175int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
   1176			      struct kvm_pgtable_mm_ops *mm_ops,
   1177			      enum kvm_pgtable_stage2_flags flags,
   1178			      kvm_pgtable_force_pte_cb_t force_pte_cb)
   1179{
   1180	size_t pgd_sz;
   1181	u64 vtcr = mmu->arch->vtcr;
   1182	u32 ia_bits = VTCR_EL2_IPA(vtcr);
   1183	u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
   1184	u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
   1185
   1186	pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
   1187	pgt->pgd = mm_ops->zalloc_pages_exact(pgd_sz);
   1188	if (!pgt->pgd)
   1189		return -ENOMEM;
   1190
   1191	pgt->ia_bits		= ia_bits;
   1192	pgt->start_level	= start_level;
   1193	pgt->mm_ops		= mm_ops;
   1194	pgt->mmu		= mmu;
   1195	pgt->flags		= flags;
   1196	pgt->force_pte_cb	= force_pte_cb;
   1197
   1198	/* Ensure zeroed PGD pages are visible to the hardware walker */
   1199	dsb(ishst);
   1200	return 0;
   1201}
   1202
   1203static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
   1204			      enum kvm_pgtable_walk_flags flag,
   1205			      void * const arg)
   1206{
   1207	struct kvm_pgtable_mm_ops *mm_ops = arg;
   1208	kvm_pte_t pte = *ptep;
   1209
   1210	if (!stage2_pte_is_counted(pte))
   1211		return 0;
   1212
   1213	mm_ops->put_page(ptep);
   1214
   1215	if (kvm_pte_table(pte, level))
   1216		mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
   1217
   1218	return 0;
   1219}
   1220
   1221void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
   1222{
   1223	size_t pgd_sz;
   1224	struct kvm_pgtable_walker walker = {
   1225		.cb	= stage2_free_walker,
   1226		.flags	= KVM_PGTABLE_WALK_LEAF |
   1227			  KVM_PGTABLE_WALK_TABLE_POST,
   1228		.arg	= pgt->mm_ops,
   1229	};
   1230
   1231	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
   1232	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
   1233	pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
   1234	pgt->pgd = NULL;
   1235}