cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

gtt.c (78418B)


      1/*
      2 * GTT virtualization
      3 *
      4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a
      7 * copy of this software and associated documentation files (the "Software"),
      8 * to deal in the Software without restriction, including without limitation
      9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10 * and/or sell copies of the Software, and to permit persons to whom the
     11 * Software is furnished to do so, subject to the following conditions:
     12 *
     13 * The above copyright notice and this permission notice (including the next
     14 * paragraph) shall be included in all copies or substantial portions of the
     15 * Software.
     16 *
     17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     23 * SOFTWARE.
     24 *
     25 * Authors:
     26 *    Zhi Wang <zhi.a.wang@intel.com>
     27 *    Zhenyu Wang <zhenyuw@linux.intel.com>
     28 *    Xiao Zheng <xiao.zheng@intel.com>
     29 *
     30 * Contributors:
     31 *    Min He <min.he@intel.com>
     32 *    Bing Niu <bing.niu@intel.com>
     33 *
     34 */
     35
     36#include "i915_drv.h"
     37#include "gvt.h"
     38#include "i915_pvinfo.h"
     39#include "trace.h"
     40
     41#include "gt/intel_gt_regs.h"
     42
     43#if defined(VERBOSE_DEBUG)
     44#define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
     45#else
     46#define gvt_vdbg_mm(fmt, args...)
     47#endif
     48
     49static bool enable_out_of_sync = false;
     50static int preallocated_oos_pages = 8192;
     51
     52static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
     53{
     54	struct kvm *kvm = vgpu->vfio_device.kvm;
     55	int idx;
     56	bool ret;
     57
     58	if (!vgpu->attached)
     59		return false;
     60
     61	idx = srcu_read_lock(&kvm->srcu);
     62	ret = kvm_is_visible_gfn(kvm, gfn);
     63	srcu_read_unlock(&kvm->srcu, idx);
     64
     65	return ret;
     66}
     67
     68/*
     69 * validate a gm address and related range size,
     70 * translate it to host gm address
     71 */
     72bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
     73{
     74	if (size == 0)
     75		return vgpu_gmadr_is_valid(vgpu, addr);
     76
     77	if (vgpu_gmadr_is_aperture(vgpu, addr) &&
     78	    vgpu_gmadr_is_aperture(vgpu, addr + size - 1))
     79		return true;
     80	else if (vgpu_gmadr_is_hidden(vgpu, addr) &&
     81		 vgpu_gmadr_is_hidden(vgpu, addr + size - 1))
     82		return true;
     83
     84	gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n",
     85		     addr, size);
     86	return false;
     87}
     88
     89/* translate a guest gmadr to host gmadr */
     90int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
     91{
     92	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
     93
     94	if (drm_WARN(&i915->drm, !vgpu_gmadr_is_valid(vgpu, g_addr),
     95		     "invalid guest gmadr %llx\n", g_addr))
     96		return -EACCES;
     97
     98	if (vgpu_gmadr_is_aperture(vgpu, g_addr))
     99		*h_addr = vgpu_aperture_gmadr_base(vgpu)
    100			  + (g_addr - vgpu_aperture_offset(vgpu));
    101	else
    102		*h_addr = vgpu_hidden_gmadr_base(vgpu)
    103			  + (g_addr - vgpu_hidden_offset(vgpu));
    104	return 0;
    105}
    106
    107/* translate a host gmadr to guest gmadr */
    108int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
    109{
    110	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
    111
    112	if (drm_WARN(&i915->drm, !gvt_gmadr_is_valid(vgpu->gvt, h_addr),
    113		     "invalid host gmadr %llx\n", h_addr))
    114		return -EACCES;
    115
    116	if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
    117		*g_addr = vgpu_aperture_gmadr_base(vgpu)
    118			+ (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
    119	else
    120		*g_addr = vgpu_hidden_gmadr_base(vgpu)
    121			+ (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
    122	return 0;
    123}
    124
    125int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
    126			     unsigned long *h_index)
    127{
    128	u64 h_addr;
    129	int ret;
    130
    131	ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT,
    132				       &h_addr);
    133	if (ret)
    134		return ret;
    135
    136	*h_index = h_addr >> I915_GTT_PAGE_SHIFT;
    137	return 0;
    138}
    139
    140int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
    141			     unsigned long *g_index)
    142{
    143	u64 g_addr;
    144	int ret;
    145
    146	ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT,
    147				       &g_addr);
    148	if (ret)
    149		return ret;
    150
    151	*g_index = g_addr >> I915_GTT_PAGE_SHIFT;
    152	return 0;
    153}
    154
    155#define gtt_type_is_entry(type) \
    156	(type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
    157	 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
    158	 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
    159
    160#define gtt_type_is_pt(type) \
    161	(type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
    162
    163#define gtt_type_is_pte_pt(type) \
    164	(type == GTT_TYPE_PPGTT_PTE_PT)
    165
    166#define gtt_type_is_root_pointer(type) \
    167	(gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
    168
    169#define gtt_init_entry(e, t, p, v) do { \
    170	(e)->type = t; \
    171	(e)->pdev = p; \
    172	memcpy(&(e)->val64, &v, sizeof(v)); \
    173} while (0)
    174
    175/*
    176 * Mappings between GTT_TYPE* enumerations.
    177 * Following information can be found according to the given type:
    178 * - type of next level page table
    179 * - type of entry inside this level page table
    180 * - type of entry with PSE set
    181 *
    182 * If the given type doesn't have such a kind of information,
    183 * e.g. give a l4 root entry type, then request to get its PSE type,
    184 * give a PTE page table type, then request to get its next level page
    185 * table type, as we know l4 root entry doesn't have a PSE bit,
    186 * and a PTE page table doesn't have a next level page table type,
    187 * GTT_TYPE_INVALID will be returned. This is useful when traversing a
    188 * page table.
    189 */
    190
    191struct gtt_type_table_entry {
    192	int entry_type;
    193	int pt_type;
    194	int next_pt_type;
    195	int pse_entry_type;
    196};
    197
    198#define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
    199	[type] = { \
    200		.entry_type = e_type, \
    201		.pt_type = cpt_type, \
    202		.next_pt_type = npt_type, \
    203		.pse_entry_type = pse_type, \
    204	}
    205
    206static const struct gtt_type_table_entry gtt_type_table[] = {
    207	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
    208			GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
    209			GTT_TYPE_INVALID,
    210			GTT_TYPE_PPGTT_PML4_PT,
    211			GTT_TYPE_INVALID),
    212	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
    213			GTT_TYPE_PPGTT_PML4_ENTRY,
    214			GTT_TYPE_PPGTT_PML4_PT,
    215			GTT_TYPE_PPGTT_PDP_PT,
    216			GTT_TYPE_INVALID),
    217	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
    218			GTT_TYPE_PPGTT_PML4_ENTRY,
    219			GTT_TYPE_PPGTT_PML4_PT,
    220			GTT_TYPE_PPGTT_PDP_PT,
    221			GTT_TYPE_INVALID),
    222	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
    223			GTT_TYPE_PPGTT_PDP_ENTRY,
    224			GTT_TYPE_PPGTT_PDP_PT,
    225			GTT_TYPE_PPGTT_PDE_PT,
    226			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
    227	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
    228			GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
    229			GTT_TYPE_INVALID,
    230			GTT_TYPE_PPGTT_PDE_PT,
    231			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
    232	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
    233			GTT_TYPE_PPGTT_PDP_ENTRY,
    234			GTT_TYPE_PPGTT_PDP_PT,
    235			GTT_TYPE_PPGTT_PDE_PT,
    236			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
    237	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
    238			GTT_TYPE_PPGTT_PDE_ENTRY,
    239			GTT_TYPE_PPGTT_PDE_PT,
    240			GTT_TYPE_PPGTT_PTE_PT,
    241			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
    242	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
    243			GTT_TYPE_PPGTT_PDE_ENTRY,
    244			GTT_TYPE_PPGTT_PDE_PT,
    245			GTT_TYPE_PPGTT_PTE_PT,
    246			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
    247	/* We take IPS bit as 'PSE' for PTE level. */
    248	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
    249			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
    250			GTT_TYPE_PPGTT_PTE_PT,
    251			GTT_TYPE_INVALID,
    252			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
    253	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
    254			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
    255			GTT_TYPE_PPGTT_PTE_PT,
    256			GTT_TYPE_INVALID,
    257			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
    258	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY,
    259			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
    260			GTT_TYPE_PPGTT_PTE_PT,
    261			GTT_TYPE_INVALID,
    262			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
    263	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
    264			GTT_TYPE_PPGTT_PDE_ENTRY,
    265			GTT_TYPE_PPGTT_PDE_PT,
    266			GTT_TYPE_INVALID,
    267			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
    268	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
    269			GTT_TYPE_PPGTT_PDP_ENTRY,
    270			GTT_TYPE_PPGTT_PDP_PT,
    271			GTT_TYPE_INVALID,
    272			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
    273	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
    274			GTT_TYPE_GGTT_PTE,
    275			GTT_TYPE_INVALID,
    276			GTT_TYPE_INVALID,
    277			GTT_TYPE_INVALID),
    278};
    279
    280static inline int get_next_pt_type(int type)
    281{
    282	return gtt_type_table[type].next_pt_type;
    283}
    284
    285static inline int get_pt_type(int type)
    286{
    287	return gtt_type_table[type].pt_type;
    288}
    289
    290static inline int get_entry_type(int type)
    291{
    292	return gtt_type_table[type].entry_type;
    293}
    294
    295static inline int get_pse_type(int type)
    296{
    297	return gtt_type_table[type].pse_entry_type;
    298}
    299
    300static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index)
    301{
    302	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
    303
    304	return readq(addr);
    305}
    306
    307static void ggtt_invalidate(struct intel_gt *gt)
    308{
    309	mmio_hw_access_pre(gt);
    310	intel_uncore_write(gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
    311	mmio_hw_access_post(gt);
    312}
    313
    314static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte)
    315{
    316	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
    317
    318	writeq(pte, addr);
    319}
    320
    321static inline int gtt_get_entry64(void *pt,
    322		struct intel_gvt_gtt_entry *e,
    323		unsigned long index, bool hypervisor_access, unsigned long gpa,
    324		struct intel_vgpu *vgpu)
    325{
    326	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
    327	int ret;
    328
    329	if (WARN_ON(info->gtt_entry_size != 8))
    330		return -EINVAL;
    331
    332	if (hypervisor_access) {
    333		ret = intel_gvt_read_gpa(vgpu, gpa +
    334				(index << info->gtt_entry_size_shift),
    335				&e->val64, 8);
    336		if (WARN_ON(ret))
    337			return ret;
    338	} else if (!pt) {
    339		e->val64 = read_pte64(vgpu->gvt->gt->ggtt, index);
    340	} else {
    341		e->val64 = *((u64 *)pt + index);
    342	}
    343	return 0;
    344}
    345
    346static inline int gtt_set_entry64(void *pt,
    347		struct intel_gvt_gtt_entry *e,
    348		unsigned long index, bool hypervisor_access, unsigned long gpa,
    349		struct intel_vgpu *vgpu)
    350{
    351	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
    352	int ret;
    353
    354	if (WARN_ON(info->gtt_entry_size != 8))
    355		return -EINVAL;
    356
    357	if (hypervisor_access) {
    358		ret = intel_gvt_write_gpa(vgpu, gpa +
    359				(index << info->gtt_entry_size_shift),
    360				&e->val64, 8);
    361		if (WARN_ON(ret))
    362			return ret;
    363	} else if (!pt) {
    364		write_pte64(vgpu->gvt->gt->ggtt, index, e->val64);
    365	} else {
    366		*((u64 *)pt + index) = e->val64;
    367	}
    368	return 0;
    369}
    370
    371#define GTT_HAW 46
    372
    373#define ADDR_1G_MASK	GENMASK_ULL(GTT_HAW - 1, 30)
    374#define ADDR_2M_MASK	GENMASK_ULL(GTT_HAW - 1, 21)
    375#define ADDR_64K_MASK	GENMASK_ULL(GTT_HAW - 1, 16)
    376#define ADDR_4K_MASK	GENMASK_ULL(GTT_HAW - 1, 12)
    377
    378#define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
    379#define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
    380
    381#define GTT_64K_PTE_STRIDE 16
    382
    383static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
    384{
    385	unsigned long pfn;
    386
    387	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
    388		pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
    389	else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
    390		pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
    391	else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY)
    392		pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT;
    393	else
    394		pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
    395	return pfn;
    396}
    397
    398static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
    399{
    400	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
    401		e->val64 &= ~ADDR_1G_MASK;
    402		pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
    403	} else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
    404		e->val64 &= ~ADDR_2M_MASK;
    405		pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
    406	} else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) {
    407		e->val64 &= ~ADDR_64K_MASK;
    408		pfn &= (ADDR_64K_MASK >> PAGE_SHIFT);
    409	} else {
    410		e->val64 &= ~ADDR_4K_MASK;
    411		pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
    412	}
    413
    414	e->val64 |= (pfn << PAGE_SHIFT);
    415}
    416
    417static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
    418{
    419	return !!(e->val64 & _PAGE_PSE);
    420}
    421
    422static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e)
    423{
    424	if (gen8_gtt_test_pse(e)) {
    425		switch (e->type) {
    426		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
    427			e->val64 &= ~_PAGE_PSE;
    428			e->type = GTT_TYPE_PPGTT_PDE_ENTRY;
    429			break;
    430		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
    431			e->type = GTT_TYPE_PPGTT_PDP_ENTRY;
    432			e->val64 &= ~_PAGE_PSE;
    433			break;
    434		default:
    435			WARN_ON(1);
    436		}
    437	}
    438}
    439
    440static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e)
    441{
    442	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
    443		return false;
    444
    445	return !!(e->val64 & GEN8_PDE_IPS_64K);
    446}
    447
    448static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e)
    449{
    450	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
    451		return;
    452
    453	e->val64 &= ~GEN8_PDE_IPS_64K;
    454}
    455
    456static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
    457{
    458	/*
    459	 * i915 writes PDP root pointer registers without present bit,
    460	 * it also works, so we need to treat root pointer entry
    461	 * specifically.
    462	 */
    463	if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
    464			|| e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
    465		return (e->val64 != 0);
    466	else
    467		return (e->val64 & GEN8_PAGE_PRESENT);
    468}
    469
    470static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
    471{
    472	e->val64 &= ~GEN8_PAGE_PRESENT;
    473}
    474
    475static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
    476{
    477	e->val64 |= GEN8_PAGE_PRESENT;
    478}
    479
    480static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
    481{
    482	return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED);
    483}
    484
    485static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e)
    486{
    487	e->val64 |= GTT_SPTE_FLAG_64K_SPLITED;
    488}
    489
    490static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e)
    491{
    492	e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED;
    493}
    494
    495/*
    496 * Per-platform GMA routines.
    497 */
    498static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
    499{
    500	unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
    501
    502	trace_gma_index(__func__, gma, x);
    503	return x;
    504}
    505
    506#define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
    507static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
    508{ \
    509	unsigned long x = (exp); \
    510	trace_gma_index(__func__, gma, x); \
    511	return x; \
    512}
    513
    514DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
    515DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
    516DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
    517DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
    518DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
    519
    520static const struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
    521	.get_entry = gtt_get_entry64,
    522	.set_entry = gtt_set_entry64,
    523	.clear_present = gtt_entry_clear_present,
    524	.set_present = gtt_entry_set_present,
    525	.test_present = gen8_gtt_test_present,
    526	.test_pse = gen8_gtt_test_pse,
    527	.clear_pse = gen8_gtt_clear_pse,
    528	.clear_ips = gen8_gtt_clear_ips,
    529	.test_ips = gen8_gtt_test_ips,
    530	.clear_64k_splited = gen8_gtt_clear_64k_splited,
    531	.set_64k_splited = gen8_gtt_set_64k_splited,
    532	.test_64k_splited = gen8_gtt_test_64k_splited,
    533	.get_pfn = gen8_gtt_get_pfn,
    534	.set_pfn = gen8_gtt_set_pfn,
    535};
    536
    537static const struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
    538	.gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
    539	.gma_to_pte_index = gen8_gma_to_pte_index,
    540	.gma_to_pde_index = gen8_gma_to_pde_index,
    541	.gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
    542	.gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
    543	.gma_to_pml4_index = gen8_gma_to_pml4_index,
    544};
    545
    546/* Update entry type per pse and ips bit. */
    547static void update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops *pte_ops,
    548	struct intel_gvt_gtt_entry *entry, bool ips)
    549{
    550	switch (entry->type) {
    551	case GTT_TYPE_PPGTT_PDE_ENTRY:
    552	case GTT_TYPE_PPGTT_PDP_ENTRY:
    553		if (pte_ops->test_pse(entry))
    554			entry->type = get_pse_type(entry->type);
    555		break;
    556	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
    557		if (ips)
    558			entry->type = get_pse_type(entry->type);
    559		break;
    560	default:
    561		GEM_BUG_ON(!gtt_type_is_entry(entry->type));
    562	}
    563
    564	GEM_BUG_ON(entry->type == GTT_TYPE_INVALID);
    565}
    566
    567/*
    568 * MM helpers.
    569 */
    570static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
    571		struct intel_gvt_gtt_entry *entry, unsigned long index,
    572		bool guest)
    573{
    574	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
    575
    576	GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
    577
    578	entry->type = mm->ppgtt_mm.root_entry_type;
    579	pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
    580			   mm->ppgtt_mm.shadow_pdps,
    581			   entry, index, false, 0, mm->vgpu);
    582	update_entry_type_for_real(pte_ops, entry, false);
    583}
    584
    585static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
    586		struct intel_gvt_gtt_entry *entry, unsigned long index)
    587{
    588	_ppgtt_get_root_entry(mm, entry, index, true);
    589}
    590
    591static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
    592		struct intel_gvt_gtt_entry *entry, unsigned long index)
    593{
    594	_ppgtt_get_root_entry(mm, entry, index, false);
    595}
    596
    597static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
    598		struct intel_gvt_gtt_entry *entry, unsigned long index,
    599		bool guest)
    600{
    601	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
    602
    603	pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
    604			   mm->ppgtt_mm.shadow_pdps,
    605			   entry, index, false, 0, mm->vgpu);
    606}
    607
    608static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
    609		struct intel_gvt_gtt_entry *entry, unsigned long index)
    610{
    611	_ppgtt_set_root_entry(mm, entry, index, false);
    612}
    613
    614static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
    615		struct intel_gvt_gtt_entry *entry, unsigned long index)
    616{
    617	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
    618
    619	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
    620
    621	entry->type = GTT_TYPE_GGTT_PTE;
    622	pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
    623			   false, 0, mm->vgpu);
    624}
    625
    626static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
    627		struct intel_gvt_gtt_entry *entry, unsigned long index)
    628{
    629	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
    630
    631	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
    632
    633	pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
    634			   false, 0, mm->vgpu);
    635}
    636
    637static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
    638		struct intel_gvt_gtt_entry *entry, unsigned long index)
    639{
    640	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
    641
    642	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
    643
    644	pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
    645}
    646
    647static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
    648		struct intel_gvt_gtt_entry *entry, unsigned long index)
    649{
    650	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
    651	unsigned long offset = index;
    652
    653	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
    654
    655	if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
    656		offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
    657		mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64;
    658	} else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
    659		offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
    660		mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64;
    661	}
    662
    663	pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
    664}
    665
    666/*
    667 * PPGTT shadow page table helpers.
    668 */
    669static inline int ppgtt_spt_get_entry(
    670		struct intel_vgpu_ppgtt_spt *spt,
    671		void *page_table, int type,
    672		struct intel_gvt_gtt_entry *e, unsigned long index,
    673		bool guest)
    674{
    675	struct intel_gvt *gvt = spt->vgpu->gvt;
    676	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
    677	int ret;
    678
    679	e->type = get_entry_type(type);
    680
    681	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
    682		return -EINVAL;
    683
    684	ret = ops->get_entry(page_table, e, index, guest,
    685			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
    686			spt->vgpu);
    687	if (ret)
    688		return ret;
    689
    690	update_entry_type_for_real(ops, e, guest ?
    691				   spt->guest_page.pde_ips : false);
    692
    693	gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
    694		    type, e->type, index, e->val64);
    695	return 0;
    696}
    697
    698static inline int ppgtt_spt_set_entry(
    699		struct intel_vgpu_ppgtt_spt *spt,
    700		void *page_table, int type,
    701		struct intel_gvt_gtt_entry *e, unsigned long index,
    702		bool guest)
    703{
    704	struct intel_gvt *gvt = spt->vgpu->gvt;
    705	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
    706
    707	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
    708		return -EINVAL;
    709
    710	gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
    711		    type, e->type, index, e->val64);
    712
    713	return ops->set_entry(page_table, e, index, guest,
    714			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
    715			spt->vgpu);
    716}
    717
    718#define ppgtt_get_guest_entry(spt, e, index) \
    719	ppgtt_spt_get_entry(spt, NULL, \
    720		spt->guest_page.type, e, index, true)
    721
    722#define ppgtt_set_guest_entry(spt, e, index) \
    723	ppgtt_spt_set_entry(spt, NULL, \
    724		spt->guest_page.type, e, index, true)
    725
    726#define ppgtt_get_shadow_entry(spt, e, index) \
    727	ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
    728		spt->shadow_page.type, e, index, false)
    729
    730#define ppgtt_set_shadow_entry(spt, e, index) \
    731	ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
    732		spt->shadow_page.type, e, index, false)
    733
    734static void *alloc_spt(gfp_t gfp_mask)
    735{
    736	struct intel_vgpu_ppgtt_spt *spt;
    737
    738	spt = kzalloc(sizeof(*spt), gfp_mask);
    739	if (!spt)
    740		return NULL;
    741
    742	spt->shadow_page.page = alloc_page(gfp_mask);
    743	if (!spt->shadow_page.page) {
    744		kfree(spt);
    745		return NULL;
    746	}
    747	return spt;
    748}
    749
    750static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
    751{
    752	__free_page(spt->shadow_page.page);
    753	kfree(spt);
    754}
    755
    756static int detach_oos_page(struct intel_vgpu *vgpu,
    757		struct intel_vgpu_oos_page *oos_page);
    758
    759static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
    760{
    761	struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev;
    762
    763	trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
    764
    765	dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
    766		       DMA_BIDIRECTIONAL);
    767
    768	radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
    769
    770	if (spt->guest_page.gfn) {
    771		if (spt->guest_page.oos_page)
    772			detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
    773
    774		intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
    775	}
    776
    777	list_del_init(&spt->post_shadow_list);
    778	free_spt(spt);
    779}
    780
    781static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
    782{
    783	struct intel_vgpu_ppgtt_spt *spt, *spn;
    784	struct radix_tree_iter iter;
    785	LIST_HEAD(all_spt);
    786	void __rcu **slot;
    787
    788	rcu_read_lock();
    789	radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
    790		spt = radix_tree_deref_slot(slot);
    791		list_move(&spt->post_shadow_list, &all_spt);
    792	}
    793	rcu_read_unlock();
    794
    795	list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
    796		ppgtt_free_spt(spt);
    797}
    798
    799static int ppgtt_handle_guest_write_page_table_bytes(
    800		struct intel_vgpu_ppgtt_spt *spt,
    801		u64 pa, void *p_data, int bytes);
    802
    803static int ppgtt_write_protection_handler(
    804		struct intel_vgpu_page_track *page_track,
    805		u64 gpa, void *data, int bytes)
    806{
    807	struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
    808
    809	int ret;
    810
    811	if (bytes != 4 && bytes != 8)
    812		return -EINVAL;
    813
    814	ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
    815	if (ret)
    816		return ret;
    817	return ret;
    818}
    819
    820/* Find a spt by guest gfn. */
    821static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
    822		struct intel_vgpu *vgpu, unsigned long gfn)
    823{
    824	struct intel_vgpu_page_track *track;
    825
    826	track = intel_vgpu_find_page_track(vgpu, gfn);
    827	if (track && track->handler == ppgtt_write_protection_handler)
    828		return track->priv_data;
    829
    830	return NULL;
    831}
    832
    833/* Find the spt by shadow page mfn. */
    834static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
    835		struct intel_vgpu *vgpu, unsigned long mfn)
    836{
    837	return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
    838}
    839
    840static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
    841
    842/* Allocate shadow page table without guest page. */
    843static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
    844		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
    845{
    846	struct device *kdev = vgpu->gvt->gt->i915->drm.dev;
    847	struct intel_vgpu_ppgtt_spt *spt = NULL;
    848	dma_addr_t daddr;
    849	int ret;
    850
    851retry:
    852	spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
    853	if (!spt) {
    854		if (reclaim_one_ppgtt_mm(vgpu->gvt))
    855			goto retry;
    856
    857		gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
    858		return ERR_PTR(-ENOMEM);
    859	}
    860
    861	spt->vgpu = vgpu;
    862	atomic_set(&spt->refcount, 1);
    863	INIT_LIST_HEAD(&spt->post_shadow_list);
    864
    865	/*
    866	 * Init shadow_page.
    867	 */
    868	spt->shadow_page.type = type;
    869	daddr = dma_map_page(kdev, spt->shadow_page.page,
    870			     0, 4096, DMA_BIDIRECTIONAL);
    871	if (dma_mapping_error(kdev, daddr)) {
    872		gvt_vgpu_err("fail to map dma addr\n");
    873		ret = -EINVAL;
    874		goto err_free_spt;
    875	}
    876	spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
    877	spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
    878
    879	ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
    880	if (ret)
    881		goto err_unmap_dma;
    882
    883	return spt;
    884
    885err_unmap_dma:
    886	dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL);
    887err_free_spt:
    888	free_spt(spt);
    889	return ERR_PTR(ret);
    890}
    891
    892/* Allocate shadow page table associated with specific gfn. */
    893static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
    894		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
    895		unsigned long gfn, bool guest_pde_ips)
    896{
    897	struct intel_vgpu_ppgtt_spt *spt;
    898	int ret;
    899
    900	spt = ppgtt_alloc_spt(vgpu, type);
    901	if (IS_ERR(spt))
    902		return spt;
    903
    904	/*
    905	 * Init guest_page.
    906	 */
    907	ret = intel_vgpu_register_page_track(vgpu, gfn,
    908			ppgtt_write_protection_handler, spt);
    909	if (ret) {
    910		ppgtt_free_spt(spt);
    911		return ERR_PTR(ret);
    912	}
    913
    914	spt->guest_page.type = type;
    915	spt->guest_page.gfn = gfn;
    916	spt->guest_page.pde_ips = guest_pde_ips;
    917
    918	trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
    919
    920	return spt;
    921}
    922
    923#define pt_entry_size_shift(spt) \
    924	((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
    925
    926#define pt_entries(spt) \
    927	(I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
    928
    929#define for_each_present_guest_entry(spt, e, i) \
    930	for (i = 0; i < pt_entries(spt); \
    931	     i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
    932		if (!ppgtt_get_guest_entry(spt, e, i) && \
    933		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
    934
    935#define for_each_present_shadow_entry(spt, e, i) \
    936	for (i = 0; i < pt_entries(spt); \
    937	     i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
    938		if (!ppgtt_get_shadow_entry(spt, e, i) && \
    939		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
    940
    941#define for_each_shadow_entry(spt, e, i) \
    942	for (i = 0; i < pt_entries(spt); \
    943	     i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
    944		if (!ppgtt_get_shadow_entry(spt, e, i))
    945
    946static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
    947{
    948	int v = atomic_read(&spt->refcount);
    949
    950	trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
    951	atomic_inc(&spt->refcount);
    952}
    953
    954static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
    955{
    956	int v = atomic_read(&spt->refcount);
    957
    958	trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
    959	return atomic_dec_return(&spt->refcount);
    960}
    961
    962static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
    963
    964static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
    965		struct intel_gvt_gtt_entry *e)
    966{
    967	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
    968	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
    969	struct intel_vgpu_ppgtt_spt *s;
    970	enum intel_gvt_gtt_type cur_pt_type;
    971
    972	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
    973
    974	if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
    975		&& e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
    976		cur_pt_type = get_next_pt_type(e->type);
    977
    978		if (!gtt_type_is_pt(cur_pt_type) ||
    979				!gtt_type_is_pt(cur_pt_type + 1)) {
    980			drm_WARN(&i915->drm, 1,
    981				 "Invalid page table type, cur_pt_type is: %d\n",
    982				 cur_pt_type);
    983			return -EINVAL;
    984		}
    985
    986		cur_pt_type += 1;
    987
    988		if (ops->get_pfn(e) ==
    989			vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
    990			return 0;
    991	}
    992	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
    993	if (!s) {
    994		gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
    995				ops->get_pfn(e));
    996		return -ENXIO;
    997	}
    998	return ppgtt_invalidate_spt(s);
    999}
   1000
   1001static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
   1002		struct intel_gvt_gtt_entry *entry)
   1003{
   1004	struct intel_vgpu *vgpu = spt->vgpu;
   1005	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
   1006	unsigned long pfn;
   1007	int type;
   1008
   1009	pfn = ops->get_pfn(entry);
   1010	type = spt->shadow_page.type;
   1011
   1012	/* Uninitialized spte or unshadowed spte. */
   1013	if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
   1014		return;
   1015
   1016	intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
   1017}
   1018
   1019static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
   1020{
   1021	struct intel_vgpu *vgpu = spt->vgpu;
   1022	struct intel_gvt_gtt_entry e;
   1023	unsigned long index;
   1024	int ret;
   1025
   1026	trace_spt_change(spt->vgpu->id, "die", spt,
   1027			spt->guest_page.gfn, spt->shadow_page.type);
   1028
   1029	if (ppgtt_put_spt(spt) > 0)
   1030		return 0;
   1031
   1032	for_each_present_shadow_entry(spt, &e, index) {
   1033		switch (e.type) {
   1034		case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
   1035			gvt_vdbg_mm("invalidate 4K entry\n");
   1036			ppgtt_invalidate_pte(spt, &e);
   1037			break;
   1038		case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
   1039			/* We don't setup 64K shadow entry so far. */
   1040			WARN(1, "suspicious 64K gtt entry\n");
   1041			continue;
   1042		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
   1043			gvt_vdbg_mm("invalidate 2M entry\n");
   1044			continue;
   1045		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
   1046			WARN(1, "GVT doesn't support 1GB page\n");
   1047			continue;
   1048		case GTT_TYPE_PPGTT_PML4_ENTRY:
   1049		case GTT_TYPE_PPGTT_PDP_ENTRY:
   1050		case GTT_TYPE_PPGTT_PDE_ENTRY:
   1051			gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
   1052			ret = ppgtt_invalidate_spt_by_shadow_entry(
   1053					spt->vgpu, &e);
   1054			if (ret)
   1055				goto fail;
   1056			break;
   1057		default:
   1058			GEM_BUG_ON(1);
   1059		}
   1060	}
   1061
   1062	trace_spt_change(spt->vgpu->id, "release", spt,
   1063			 spt->guest_page.gfn, spt->shadow_page.type);
   1064	ppgtt_free_spt(spt);
   1065	return 0;
   1066fail:
   1067	gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
   1068			spt, e.val64, e.type);
   1069	return ret;
   1070}
   1071
   1072static bool vgpu_ips_enabled(struct intel_vgpu *vgpu)
   1073{
   1074	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
   1075
   1076	if (GRAPHICS_VER(dev_priv) == 9) {
   1077		u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) &
   1078			GAMW_ECO_ENABLE_64K_IPS_FIELD;
   1079
   1080		return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD;
   1081	} else if (GRAPHICS_VER(dev_priv) >= 11) {
   1082		/* 64K paging only controlled by IPS bit in PTE now. */
   1083		return true;
   1084	} else
   1085		return false;
   1086}
   1087
   1088static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
   1089
   1090static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
   1091		struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
   1092{
   1093	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
   1094	struct intel_vgpu_ppgtt_spt *spt = NULL;
   1095	bool ips = false;
   1096	int ret;
   1097
   1098	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
   1099
   1100	if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
   1101		ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
   1102
   1103	spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
   1104	if (spt) {
   1105		ppgtt_get_spt(spt);
   1106
   1107		if (ips != spt->guest_page.pde_ips) {
   1108			spt->guest_page.pde_ips = ips;
   1109
   1110			gvt_dbg_mm("reshadow PDE since ips changed\n");
   1111			clear_page(spt->shadow_page.vaddr);
   1112			ret = ppgtt_populate_spt(spt);
   1113			if (ret) {
   1114				ppgtt_put_spt(spt);
   1115				goto err;
   1116			}
   1117		}
   1118	} else {
   1119		int type = get_next_pt_type(we->type);
   1120
   1121		if (!gtt_type_is_pt(type)) {
   1122			ret = -EINVAL;
   1123			goto err;
   1124		}
   1125
   1126		spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
   1127		if (IS_ERR(spt)) {
   1128			ret = PTR_ERR(spt);
   1129			goto err;
   1130		}
   1131
   1132		ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
   1133		if (ret)
   1134			goto err_free_spt;
   1135
   1136		ret = ppgtt_populate_spt(spt);
   1137		if (ret)
   1138			goto err_free_spt;
   1139
   1140		trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
   1141				 spt->shadow_page.type);
   1142	}
   1143	return spt;
   1144
   1145err_free_spt:
   1146	ppgtt_free_spt(spt);
   1147	spt = NULL;
   1148err:
   1149	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
   1150		     spt, we->val64, we->type);
   1151	return ERR_PTR(ret);
   1152}
   1153
   1154static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
   1155		struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
   1156{
   1157	const struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
   1158
   1159	se->type = ge->type;
   1160	se->val64 = ge->val64;
   1161
   1162	/* Because we always split 64KB pages, so clear IPS in shadow PDE. */
   1163	if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
   1164		ops->clear_ips(se);
   1165
   1166	ops->set_pfn(se, s->shadow_page.mfn);
   1167}
   1168
   1169/*
   1170 * Check if can do 2M page
   1171 * @vgpu: target vgpu
   1172 * @entry: target pfn's gtt entry
   1173 *
   1174 * Return 1 if 2MB huge gtt shadowing is possible, 0 if miscondition,
   1175 * negative if found err.
   1176 */
   1177static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
   1178	struct intel_gvt_gtt_entry *entry)
   1179{
   1180	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
   1181	kvm_pfn_t pfn;
   1182
   1183	if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M))
   1184		return 0;
   1185
   1186	if (!vgpu->attached)
   1187		return -EINVAL;
   1188	pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
   1189	if (is_error_noslot_pfn(pfn))
   1190		return -EINVAL;
   1191	return PageTransHuge(pfn_to_page(pfn));
   1192}
   1193
   1194static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
   1195	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
   1196	struct intel_gvt_gtt_entry *se)
   1197{
   1198	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
   1199	struct intel_vgpu_ppgtt_spt *sub_spt;
   1200	struct intel_gvt_gtt_entry sub_se;
   1201	unsigned long start_gfn;
   1202	dma_addr_t dma_addr;
   1203	unsigned long sub_index;
   1204	int ret;
   1205
   1206	gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
   1207
   1208	start_gfn = ops->get_pfn(se);
   1209
   1210	sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT);
   1211	if (IS_ERR(sub_spt))
   1212		return PTR_ERR(sub_spt);
   1213
   1214	for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
   1215		ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index,
   1216						   PAGE_SIZE, &dma_addr);
   1217		if (ret) {
   1218			ppgtt_invalidate_spt(spt);
   1219			return ret;
   1220		}
   1221		sub_se.val64 = se->val64;
   1222
   1223		/* Copy the PAT field from PDE. */
   1224		sub_se.val64 &= ~_PAGE_PAT;
   1225		sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
   1226
   1227		ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
   1228		ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
   1229	}
   1230
   1231	/* Clear dirty field. */
   1232	se->val64 &= ~_PAGE_DIRTY;
   1233
   1234	ops->clear_pse(se);
   1235	ops->clear_ips(se);
   1236	ops->set_pfn(se, sub_spt->shadow_page.mfn);
   1237	ppgtt_set_shadow_entry(spt, se, index);
   1238	return 0;
   1239}
   1240
   1241static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
   1242	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
   1243	struct intel_gvt_gtt_entry *se)
   1244{
   1245	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
   1246	struct intel_gvt_gtt_entry entry = *se;
   1247	unsigned long start_gfn;
   1248	dma_addr_t dma_addr;
   1249	int i, ret;
   1250
   1251	gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
   1252
   1253	GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
   1254
   1255	start_gfn = ops->get_pfn(se);
   1256
   1257	entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
   1258	ops->set_64k_splited(&entry);
   1259
   1260	for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
   1261		ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + i,
   1262						   PAGE_SIZE, &dma_addr);
   1263		if (ret)
   1264			return ret;
   1265
   1266		ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
   1267		ppgtt_set_shadow_entry(spt, &entry, index + i);
   1268	}
   1269	return 0;
   1270}
   1271
   1272static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
   1273	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
   1274	struct intel_gvt_gtt_entry *ge)
   1275{
   1276	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
   1277	struct intel_gvt_gtt_entry se = *ge;
   1278	unsigned long gfn, page_size = PAGE_SIZE;
   1279	dma_addr_t dma_addr;
   1280	int ret;
   1281
   1282	if (!pte_ops->test_present(ge))
   1283		return 0;
   1284
   1285	gfn = pte_ops->get_pfn(ge);
   1286
   1287	switch (ge->type) {
   1288	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
   1289		gvt_vdbg_mm("shadow 4K gtt entry\n");
   1290		break;
   1291	case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
   1292		gvt_vdbg_mm("shadow 64K gtt entry\n");
   1293		/*
   1294		 * The layout of 64K page is special, the page size is
   1295		 * controlled by uper PDE. To be simple, we always split
   1296		 * 64K page to smaller 4K pages in shadow PT.
   1297		 */
   1298		return split_64KB_gtt_entry(vgpu, spt, index, &se);
   1299	case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
   1300		gvt_vdbg_mm("shadow 2M gtt entry\n");
   1301		ret = is_2MB_gtt_possible(vgpu, ge);
   1302		if (ret == 0)
   1303			return split_2MB_gtt_entry(vgpu, spt, index, &se);
   1304		else if (ret < 0)
   1305			return ret;
   1306		page_size = I915_GTT_PAGE_SIZE_2M;
   1307		break;
   1308	case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
   1309		gvt_vgpu_err("GVT doesn't support 1GB entry\n");
   1310		return -EINVAL;
   1311	default:
   1312		GEM_BUG_ON(1);
   1313	}
   1314
   1315	/* direct shadow */
   1316	ret = intel_gvt_dma_map_guest_page(vgpu, gfn, page_size, &dma_addr);
   1317	if (ret)
   1318		return -ENXIO;
   1319
   1320	pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
   1321	ppgtt_set_shadow_entry(spt, &se, index);
   1322	return 0;
   1323}
   1324
   1325static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
   1326{
   1327	struct intel_vgpu *vgpu = spt->vgpu;
   1328	struct intel_gvt *gvt = vgpu->gvt;
   1329	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
   1330	struct intel_vgpu_ppgtt_spt *s;
   1331	struct intel_gvt_gtt_entry se, ge;
   1332	unsigned long gfn, i;
   1333	int ret;
   1334
   1335	trace_spt_change(spt->vgpu->id, "born", spt,
   1336			 spt->guest_page.gfn, spt->shadow_page.type);
   1337
   1338	for_each_present_guest_entry(spt, &ge, i) {
   1339		if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
   1340			s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
   1341			if (IS_ERR(s)) {
   1342				ret = PTR_ERR(s);
   1343				goto fail;
   1344			}
   1345			ppgtt_get_shadow_entry(spt, &se, i);
   1346			ppgtt_generate_shadow_entry(&se, s, &ge);
   1347			ppgtt_set_shadow_entry(spt, &se, i);
   1348		} else {
   1349			gfn = ops->get_pfn(&ge);
   1350			if (!intel_gvt_is_valid_gfn(vgpu, gfn)) {
   1351				ops->set_pfn(&se, gvt->gtt.scratch_mfn);
   1352				ppgtt_set_shadow_entry(spt, &se, i);
   1353				continue;
   1354			}
   1355
   1356			ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
   1357			if (ret)
   1358				goto fail;
   1359		}
   1360	}
   1361	return 0;
   1362fail:
   1363	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
   1364			spt, ge.val64, ge.type);
   1365	return ret;
   1366}
   1367
   1368static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
   1369		struct intel_gvt_gtt_entry *se, unsigned long index)
   1370{
   1371	struct intel_vgpu *vgpu = spt->vgpu;
   1372	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
   1373	int ret;
   1374
   1375	trace_spt_guest_change(spt->vgpu->id, "remove", spt,
   1376			       spt->shadow_page.type, se->val64, index);
   1377
   1378	gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
   1379		    se->type, index, se->val64);
   1380
   1381	if (!ops->test_present(se))
   1382		return 0;
   1383
   1384	if (ops->get_pfn(se) ==
   1385	    vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
   1386		return 0;
   1387
   1388	if (gtt_type_is_pt(get_next_pt_type(se->type))) {
   1389		struct intel_vgpu_ppgtt_spt *s =
   1390			intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
   1391		if (!s) {
   1392			gvt_vgpu_err("fail to find guest page\n");
   1393			ret = -ENXIO;
   1394			goto fail;
   1395		}
   1396		ret = ppgtt_invalidate_spt(s);
   1397		if (ret)
   1398			goto fail;
   1399	} else {
   1400		/* We don't setup 64K shadow entry so far. */
   1401		WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
   1402		     "suspicious 64K entry\n");
   1403		ppgtt_invalidate_pte(spt, se);
   1404	}
   1405
   1406	return 0;
   1407fail:
   1408	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
   1409			spt, se->val64, se->type);
   1410	return ret;
   1411}
   1412
   1413static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
   1414		struct intel_gvt_gtt_entry *we, unsigned long index)
   1415{
   1416	struct intel_vgpu *vgpu = spt->vgpu;
   1417	struct intel_gvt_gtt_entry m;
   1418	struct intel_vgpu_ppgtt_spt *s;
   1419	int ret;
   1420
   1421	trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
   1422			       we->val64, index);
   1423
   1424	gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
   1425		    we->type, index, we->val64);
   1426
   1427	if (gtt_type_is_pt(get_next_pt_type(we->type))) {
   1428		s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
   1429		if (IS_ERR(s)) {
   1430			ret = PTR_ERR(s);
   1431			goto fail;
   1432		}
   1433		ppgtt_get_shadow_entry(spt, &m, index);
   1434		ppgtt_generate_shadow_entry(&m, s, we);
   1435		ppgtt_set_shadow_entry(spt, &m, index);
   1436	} else {
   1437		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
   1438		if (ret)
   1439			goto fail;
   1440	}
   1441	return 0;
   1442fail:
   1443	gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
   1444		spt, we->val64, we->type);
   1445	return ret;
   1446}
   1447
   1448static int sync_oos_page(struct intel_vgpu *vgpu,
   1449		struct intel_vgpu_oos_page *oos_page)
   1450{
   1451	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
   1452	struct intel_gvt *gvt = vgpu->gvt;
   1453	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
   1454	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
   1455	struct intel_gvt_gtt_entry old, new;
   1456	int index;
   1457	int ret;
   1458
   1459	trace_oos_change(vgpu->id, "sync", oos_page->id,
   1460			 spt, spt->guest_page.type);
   1461
   1462	old.type = new.type = get_entry_type(spt->guest_page.type);
   1463	old.val64 = new.val64 = 0;
   1464
   1465	for (index = 0; index < (I915_GTT_PAGE_SIZE >>
   1466				info->gtt_entry_size_shift); index++) {
   1467		ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
   1468		ops->get_entry(NULL, &new, index, true,
   1469			       spt->guest_page.gfn << PAGE_SHIFT, vgpu);
   1470
   1471		if (old.val64 == new.val64
   1472			&& !test_and_clear_bit(index, spt->post_shadow_bitmap))
   1473			continue;
   1474
   1475		trace_oos_sync(vgpu->id, oos_page->id,
   1476				spt, spt->guest_page.type,
   1477				new.val64, index);
   1478
   1479		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
   1480		if (ret)
   1481			return ret;
   1482
   1483		ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
   1484	}
   1485
   1486	spt->guest_page.write_cnt = 0;
   1487	list_del_init(&spt->post_shadow_list);
   1488	return 0;
   1489}
   1490
   1491static int detach_oos_page(struct intel_vgpu *vgpu,
   1492		struct intel_vgpu_oos_page *oos_page)
   1493{
   1494	struct intel_gvt *gvt = vgpu->gvt;
   1495	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
   1496
   1497	trace_oos_change(vgpu->id, "detach", oos_page->id,
   1498			 spt, spt->guest_page.type);
   1499
   1500	spt->guest_page.write_cnt = 0;
   1501	spt->guest_page.oos_page = NULL;
   1502	oos_page->spt = NULL;
   1503
   1504	list_del_init(&oos_page->vm_list);
   1505	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
   1506
   1507	return 0;
   1508}
   1509
   1510static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
   1511		struct intel_vgpu_ppgtt_spt *spt)
   1512{
   1513	struct intel_gvt *gvt = spt->vgpu->gvt;
   1514	int ret;
   1515
   1516	ret = intel_gvt_read_gpa(spt->vgpu,
   1517			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
   1518			oos_page->mem, I915_GTT_PAGE_SIZE);
   1519	if (ret)
   1520		return ret;
   1521
   1522	oos_page->spt = spt;
   1523	spt->guest_page.oos_page = oos_page;
   1524
   1525	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
   1526
   1527	trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
   1528			 spt, spt->guest_page.type);
   1529	return 0;
   1530}
   1531
   1532static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
   1533{
   1534	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
   1535	int ret;
   1536
   1537	ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
   1538	if (ret)
   1539		return ret;
   1540
   1541	trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
   1542			 spt, spt->guest_page.type);
   1543
   1544	list_del_init(&oos_page->vm_list);
   1545	return sync_oos_page(spt->vgpu, oos_page);
   1546}
   1547
   1548static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
   1549{
   1550	struct intel_gvt *gvt = spt->vgpu->gvt;
   1551	struct intel_gvt_gtt *gtt = &gvt->gtt;
   1552	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
   1553	int ret;
   1554
   1555	WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
   1556
   1557	if (list_empty(&gtt->oos_page_free_list_head)) {
   1558		oos_page = container_of(gtt->oos_page_use_list_head.next,
   1559			struct intel_vgpu_oos_page, list);
   1560		ret = ppgtt_set_guest_page_sync(oos_page->spt);
   1561		if (ret)
   1562			return ret;
   1563		ret = detach_oos_page(spt->vgpu, oos_page);
   1564		if (ret)
   1565			return ret;
   1566	} else
   1567		oos_page = container_of(gtt->oos_page_free_list_head.next,
   1568			struct intel_vgpu_oos_page, list);
   1569	return attach_oos_page(oos_page, spt);
   1570}
   1571
   1572static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
   1573{
   1574	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
   1575
   1576	if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
   1577		return -EINVAL;
   1578
   1579	trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
   1580			 spt, spt->guest_page.type);
   1581
   1582	list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
   1583	return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
   1584}
   1585
   1586/**
   1587 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
   1588 * @vgpu: a vGPU
   1589 *
   1590 * This function is called before submitting a guest workload to host,
   1591 * to sync all the out-of-synced shadow for vGPU
   1592 *
   1593 * Returns:
   1594 * Zero on success, negative error code if failed.
   1595 */
   1596int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
   1597{
   1598	struct list_head *pos, *n;
   1599	struct intel_vgpu_oos_page *oos_page;
   1600	int ret;
   1601
   1602	if (!enable_out_of_sync)
   1603		return 0;
   1604
   1605	list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
   1606		oos_page = container_of(pos,
   1607				struct intel_vgpu_oos_page, vm_list);
   1608		ret = ppgtt_set_guest_page_sync(oos_page->spt);
   1609		if (ret)
   1610			return ret;
   1611	}
   1612	return 0;
   1613}
   1614
   1615/*
   1616 * The heart of PPGTT shadow page table.
   1617 */
   1618static int ppgtt_handle_guest_write_page_table(
   1619		struct intel_vgpu_ppgtt_spt *spt,
   1620		struct intel_gvt_gtt_entry *we, unsigned long index)
   1621{
   1622	struct intel_vgpu *vgpu = spt->vgpu;
   1623	int type = spt->shadow_page.type;
   1624	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
   1625	struct intel_gvt_gtt_entry old_se;
   1626	int new_present;
   1627	int i, ret;
   1628
   1629	new_present = ops->test_present(we);
   1630
   1631	/*
   1632	 * Adding the new entry first and then removing the old one, that can
   1633	 * guarantee the ppgtt table is validated during the window between
   1634	 * adding and removal.
   1635	 */
   1636	ppgtt_get_shadow_entry(spt, &old_se, index);
   1637
   1638	if (new_present) {
   1639		ret = ppgtt_handle_guest_entry_add(spt, we, index);
   1640		if (ret)
   1641			goto fail;
   1642	}
   1643
   1644	ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
   1645	if (ret)
   1646		goto fail;
   1647
   1648	if (!new_present) {
   1649		/* For 64KB splited entries, we need clear them all. */
   1650		if (ops->test_64k_splited(&old_se) &&
   1651		    !(index % GTT_64K_PTE_STRIDE)) {
   1652			gvt_vdbg_mm("remove splited 64K shadow entries\n");
   1653			for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
   1654				ops->clear_64k_splited(&old_se);
   1655				ops->set_pfn(&old_se,
   1656					vgpu->gtt.scratch_pt[type].page_mfn);
   1657				ppgtt_set_shadow_entry(spt, &old_se, index + i);
   1658			}
   1659		} else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
   1660			   old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
   1661			ops->clear_pse(&old_se);
   1662			ops->set_pfn(&old_se,
   1663				     vgpu->gtt.scratch_pt[type].page_mfn);
   1664			ppgtt_set_shadow_entry(spt, &old_se, index);
   1665		} else {
   1666			ops->set_pfn(&old_se,
   1667				     vgpu->gtt.scratch_pt[type].page_mfn);
   1668			ppgtt_set_shadow_entry(spt, &old_se, index);
   1669		}
   1670	}
   1671
   1672	return 0;
   1673fail:
   1674	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
   1675			spt, we->val64, we->type);
   1676	return ret;
   1677}
   1678
   1679
   1680
   1681static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
   1682{
   1683	return enable_out_of_sync
   1684		&& gtt_type_is_pte_pt(spt->guest_page.type)
   1685		&& spt->guest_page.write_cnt >= 2;
   1686}
   1687
   1688static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
   1689		unsigned long index)
   1690{
   1691	set_bit(index, spt->post_shadow_bitmap);
   1692	if (!list_empty(&spt->post_shadow_list))
   1693		return;
   1694
   1695	list_add_tail(&spt->post_shadow_list,
   1696			&spt->vgpu->gtt.post_shadow_list_head);
   1697}
   1698
   1699/**
   1700 * intel_vgpu_flush_post_shadow - flush the post shadow transactions
   1701 * @vgpu: a vGPU
   1702 *
   1703 * This function is called before submitting a guest workload to host,
   1704 * to flush all the post shadows for a vGPU.
   1705 *
   1706 * Returns:
   1707 * Zero on success, negative error code if failed.
   1708 */
   1709int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
   1710{
   1711	struct list_head *pos, *n;
   1712	struct intel_vgpu_ppgtt_spt *spt;
   1713	struct intel_gvt_gtt_entry ge;
   1714	unsigned long index;
   1715	int ret;
   1716
   1717	list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
   1718		spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
   1719				post_shadow_list);
   1720
   1721		for_each_set_bit(index, spt->post_shadow_bitmap,
   1722				GTT_ENTRY_NUM_IN_ONE_PAGE) {
   1723			ppgtt_get_guest_entry(spt, &ge, index);
   1724
   1725			ret = ppgtt_handle_guest_write_page_table(spt,
   1726							&ge, index);
   1727			if (ret)
   1728				return ret;
   1729			clear_bit(index, spt->post_shadow_bitmap);
   1730		}
   1731		list_del_init(&spt->post_shadow_list);
   1732	}
   1733	return 0;
   1734}
   1735
   1736static int ppgtt_handle_guest_write_page_table_bytes(
   1737		struct intel_vgpu_ppgtt_spt *spt,
   1738		u64 pa, void *p_data, int bytes)
   1739{
   1740	struct intel_vgpu *vgpu = spt->vgpu;
   1741	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
   1742	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
   1743	struct intel_gvt_gtt_entry we, se;
   1744	unsigned long index;
   1745	int ret;
   1746
   1747	index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
   1748
   1749	ppgtt_get_guest_entry(spt, &we, index);
   1750
   1751	/*
   1752	 * For page table which has 64K gtt entry, only PTE#0, PTE#16,
   1753	 * PTE#32, ... PTE#496 are used. Unused PTEs update should be
   1754	 * ignored.
   1755	 */
   1756	if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
   1757	    (index % GTT_64K_PTE_STRIDE)) {
   1758		gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
   1759			    index);
   1760		return 0;
   1761	}
   1762
   1763	if (bytes == info->gtt_entry_size) {
   1764		ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
   1765		if (ret)
   1766			return ret;
   1767	} else {
   1768		if (!test_bit(index, spt->post_shadow_bitmap)) {
   1769			int type = spt->shadow_page.type;
   1770
   1771			ppgtt_get_shadow_entry(spt, &se, index);
   1772			ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
   1773			if (ret)
   1774				return ret;
   1775			ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
   1776			ppgtt_set_shadow_entry(spt, &se, index);
   1777		}
   1778		ppgtt_set_post_shadow(spt, index);
   1779	}
   1780
   1781	if (!enable_out_of_sync)
   1782		return 0;
   1783
   1784	spt->guest_page.write_cnt++;
   1785
   1786	if (spt->guest_page.oos_page)
   1787		ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
   1788				false, 0, vgpu);
   1789
   1790	if (can_do_out_of_sync(spt)) {
   1791		if (!spt->guest_page.oos_page)
   1792			ppgtt_allocate_oos_page(spt);
   1793
   1794		ret = ppgtt_set_guest_page_oos(spt);
   1795		if (ret < 0)
   1796			return ret;
   1797	}
   1798	return 0;
   1799}
   1800
   1801static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
   1802{
   1803	struct intel_vgpu *vgpu = mm->vgpu;
   1804	struct intel_gvt *gvt = vgpu->gvt;
   1805	struct intel_gvt_gtt *gtt = &gvt->gtt;
   1806	const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
   1807	struct intel_gvt_gtt_entry se;
   1808	int index;
   1809
   1810	if (!mm->ppgtt_mm.shadowed)
   1811		return;
   1812
   1813	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
   1814		ppgtt_get_shadow_root_entry(mm, &se, index);
   1815
   1816		if (!ops->test_present(&se))
   1817			continue;
   1818
   1819		ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
   1820		se.val64 = 0;
   1821		ppgtt_set_shadow_root_entry(mm, &se, index);
   1822
   1823		trace_spt_guest_change(vgpu->id, "destroy root pointer",
   1824				       NULL, se.type, se.val64, index);
   1825	}
   1826
   1827	mm->ppgtt_mm.shadowed = false;
   1828}
   1829
   1830
   1831static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
   1832{
   1833	struct intel_vgpu *vgpu = mm->vgpu;
   1834	struct intel_gvt *gvt = vgpu->gvt;
   1835	struct intel_gvt_gtt *gtt = &gvt->gtt;
   1836	const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
   1837	struct intel_vgpu_ppgtt_spt *spt;
   1838	struct intel_gvt_gtt_entry ge, se;
   1839	int index, ret;
   1840
   1841	if (mm->ppgtt_mm.shadowed)
   1842		return 0;
   1843
   1844	mm->ppgtt_mm.shadowed = true;
   1845
   1846	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
   1847		ppgtt_get_guest_root_entry(mm, &ge, index);
   1848
   1849		if (!ops->test_present(&ge))
   1850			continue;
   1851
   1852		trace_spt_guest_change(vgpu->id, __func__, NULL,
   1853				       ge.type, ge.val64, index);
   1854
   1855		spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
   1856		if (IS_ERR(spt)) {
   1857			gvt_vgpu_err("fail to populate guest root pointer\n");
   1858			ret = PTR_ERR(spt);
   1859			goto fail;
   1860		}
   1861		ppgtt_generate_shadow_entry(&se, spt, &ge);
   1862		ppgtt_set_shadow_root_entry(mm, &se, index);
   1863
   1864		trace_spt_guest_change(vgpu->id, "populate root pointer",
   1865				       NULL, se.type, se.val64, index);
   1866	}
   1867
   1868	return 0;
   1869fail:
   1870	invalidate_ppgtt_mm(mm);
   1871	return ret;
   1872}
   1873
   1874static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
   1875{
   1876	struct intel_vgpu_mm *mm;
   1877
   1878	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
   1879	if (!mm)
   1880		return NULL;
   1881
   1882	mm->vgpu = vgpu;
   1883	kref_init(&mm->ref);
   1884	atomic_set(&mm->pincount, 0);
   1885
   1886	return mm;
   1887}
   1888
   1889static void vgpu_free_mm(struct intel_vgpu_mm *mm)
   1890{
   1891	kfree(mm);
   1892}
   1893
   1894/**
   1895 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
   1896 * @vgpu: a vGPU
   1897 * @root_entry_type: ppgtt root entry type
   1898 * @pdps: guest pdps.
   1899 *
   1900 * This function is used to create a ppgtt mm object for a vGPU.
   1901 *
   1902 * Returns:
   1903 * Zero on success, negative error code in pointer if failed.
   1904 */
   1905struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
   1906		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
   1907{
   1908	struct intel_gvt *gvt = vgpu->gvt;
   1909	struct intel_vgpu_mm *mm;
   1910	int ret;
   1911
   1912	mm = vgpu_alloc_mm(vgpu);
   1913	if (!mm)
   1914		return ERR_PTR(-ENOMEM);
   1915
   1916	mm->type = INTEL_GVT_MM_PPGTT;
   1917
   1918	GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
   1919		   root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
   1920	mm->ppgtt_mm.root_entry_type = root_entry_type;
   1921
   1922	INIT_LIST_HEAD(&mm->ppgtt_mm.list);
   1923	INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
   1924	INIT_LIST_HEAD(&mm->ppgtt_mm.link);
   1925
   1926	if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
   1927		mm->ppgtt_mm.guest_pdps[0] = pdps[0];
   1928	else
   1929		memcpy(mm->ppgtt_mm.guest_pdps, pdps,
   1930		       sizeof(mm->ppgtt_mm.guest_pdps));
   1931
   1932	ret = shadow_ppgtt_mm(mm);
   1933	if (ret) {
   1934		gvt_vgpu_err("failed to shadow ppgtt mm\n");
   1935		vgpu_free_mm(mm);
   1936		return ERR_PTR(ret);
   1937	}
   1938
   1939	list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
   1940
   1941	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
   1942	list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
   1943	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
   1944
   1945	return mm;
   1946}
   1947
   1948static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
   1949{
   1950	struct intel_vgpu_mm *mm;
   1951	unsigned long nr_entries;
   1952
   1953	mm = vgpu_alloc_mm(vgpu);
   1954	if (!mm)
   1955		return ERR_PTR(-ENOMEM);
   1956
   1957	mm->type = INTEL_GVT_MM_GGTT;
   1958
   1959	nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
   1960	mm->ggtt_mm.virtual_ggtt =
   1961		vzalloc(array_size(nr_entries,
   1962				   vgpu->gvt->device_info.gtt_entry_size));
   1963	if (!mm->ggtt_mm.virtual_ggtt) {
   1964		vgpu_free_mm(mm);
   1965		return ERR_PTR(-ENOMEM);
   1966	}
   1967
   1968	mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
   1969	if (!mm->ggtt_mm.host_ggtt_aperture) {
   1970		vfree(mm->ggtt_mm.virtual_ggtt);
   1971		vgpu_free_mm(mm);
   1972		return ERR_PTR(-ENOMEM);
   1973	}
   1974
   1975	mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
   1976	if (!mm->ggtt_mm.host_ggtt_hidden) {
   1977		vfree(mm->ggtt_mm.host_ggtt_aperture);
   1978		vfree(mm->ggtt_mm.virtual_ggtt);
   1979		vgpu_free_mm(mm);
   1980		return ERR_PTR(-ENOMEM);
   1981	}
   1982
   1983	return mm;
   1984}
   1985
   1986/**
   1987 * _intel_vgpu_mm_release - destroy a mm object
   1988 * @mm_ref: a kref object
   1989 *
   1990 * This function is used to destroy a mm object for vGPU
   1991 *
   1992 */
   1993void _intel_vgpu_mm_release(struct kref *mm_ref)
   1994{
   1995	struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
   1996
   1997	if (GEM_WARN_ON(atomic_read(&mm->pincount)))
   1998		gvt_err("vgpu mm pin count bug detected\n");
   1999
   2000	if (mm->type == INTEL_GVT_MM_PPGTT) {
   2001		list_del(&mm->ppgtt_mm.list);
   2002
   2003		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
   2004		list_del(&mm->ppgtt_mm.lru_list);
   2005		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
   2006
   2007		invalidate_ppgtt_mm(mm);
   2008	} else {
   2009		vfree(mm->ggtt_mm.virtual_ggtt);
   2010		vfree(mm->ggtt_mm.host_ggtt_aperture);
   2011		vfree(mm->ggtt_mm.host_ggtt_hidden);
   2012	}
   2013
   2014	vgpu_free_mm(mm);
   2015}
   2016
   2017/**
   2018 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
   2019 * @mm: a vGPU mm object
   2020 *
   2021 * This function is called when user doesn't want to use a vGPU mm object
   2022 */
   2023void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
   2024{
   2025	atomic_dec_if_positive(&mm->pincount);
   2026}
   2027
   2028/**
   2029 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
   2030 * @mm: target vgpu mm
   2031 *
   2032 * This function is called when user wants to use a vGPU mm object. If this
   2033 * mm object hasn't been shadowed yet, the shadow will be populated at this
   2034 * time.
   2035 *
   2036 * Returns:
   2037 * Zero on success, negative error code if failed.
   2038 */
   2039int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
   2040{
   2041	int ret;
   2042
   2043	atomic_inc(&mm->pincount);
   2044
   2045	if (mm->type == INTEL_GVT_MM_PPGTT) {
   2046		ret = shadow_ppgtt_mm(mm);
   2047		if (ret)
   2048			return ret;
   2049
   2050		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
   2051		list_move_tail(&mm->ppgtt_mm.lru_list,
   2052			       &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
   2053		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
   2054	}
   2055
   2056	return 0;
   2057}
   2058
   2059static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
   2060{
   2061	struct intel_vgpu_mm *mm;
   2062	struct list_head *pos, *n;
   2063
   2064	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
   2065
   2066	list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
   2067		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
   2068
   2069		if (atomic_read(&mm->pincount))
   2070			continue;
   2071
   2072		list_del_init(&mm->ppgtt_mm.lru_list);
   2073		mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
   2074		invalidate_ppgtt_mm(mm);
   2075		return 1;
   2076	}
   2077	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
   2078	return 0;
   2079}
   2080
   2081/*
   2082 * GMA translation APIs.
   2083 */
   2084static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
   2085		struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
   2086{
   2087	struct intel_vgpu *vgpu = mm->vgpu;
   2088	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
   2089	struct intel_vgpu_ppgtt_spt *s;
   2090
   2091	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
   2092	if (!s)
   2093		return -ENXIO;
   2094
   2095	if (!guest)
   2096		ppgtt_get_shadow_entry(s, e, index);
   2097	else
   2098		ppgtt_get_guest_entry(s, e, index);
   2099	return 0;
   2100}
   2101
   2102/**
   2103 * intel_vgpu_gma_to_gpa - translate a gma to GPA
   2104 * @mm: mm object. could be a PPGTT or GGTT mm object
   2105 * @gma: graphics memory address in this mm object
   2106 *
   2107 * This function is used to translate a graphics memory address in specific
   2108 * graphics memory space to guest physical address.
   2109 *
   2110 * Returns:
   2111 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
   2112 */
   2113unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
   2114{
   2115	struct intel_vgpu *vgpu = mm->vgpu;
   2116	struct intel_gvt *gvt = vgpu->gvt;
   2117	const struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
   2118	const struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
   2119	unsigned long gpa = INTEL_GVT_INVALID_ADDR;
   2120	unsigned long gma_index[4];
   2121	struct intel_gvt_gtt_entry e;
   2122	int i, levels = 0;
   2123	int ret;
   2124
   2125	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
   2126		   mm->type != INTEL_GVT_MM_PPGTT);
   2127
   2128	if (mm->type == INTEL_GVT_MM_GGTT) {
   2129		if (!vgpu_gmadr_is_valid(vgpu, gma))
   2130			goto err;
   2131
   2132		ggtt_get_guest_entry(mm, &e,
   2133			gma_ops->gma_to_ggtt_pte_index(gma));
   2134
   2135		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
   2136			+ (gma & ~I915_GTT_PAGE_MASK);
   2137
   2138		trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
   2139	} else {
   2140		switch (mm->ppgtt_mm.root_entry_type) {
   2141		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
   2142			ppgtt_get_shadow_root_entry(mm, &e, 0);
   2143
   2144			gma_index[0] = gma_ops->gma_to_pml4_index(gma);
   2145			gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
   2146			gma_index[2] = gma_ops->gma_to_pde_index(gma);
   2147			gma_index[3] = gma_ops->gma_to_pte_index(gma);
   2148			levels = 4;
   2149			break;
   2150		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
   2151			ppgtt_get_shadow_root_entry(mm, &e,
   2152					gma_ops->gma_to_l3_pdp_index(gma));
   2153
   2154			gma_index[0] = gma_ops->gma_to_pde_index(gma);
   2155			gma_index[1] = gma_ops->gma_to_pte_index(gma);
   2156			levels = 2;
   2157			break;
   2158		default:
   2159			GEM_BUG_ON(1);
   2160		}
   2161
   2162		/* walk the shadow page table and get gpa from guest entry */
   2163		for (i = 0; i < levels; i++) {
   2164			ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
   2165				(i == levels - 1));
   2166			if (ret)
   2167				goto err;
   2168
   2169			if (!pte_ops->test_present(&e)) {
   2170				gvt_dbg_core("GMA 0x%lx is not present\n", gma);
   2171				goto err;
   2172			}
   2173		}
   2174
   2175		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
   2176					(gma & ~I915_GTT_PAGE_MASK);
   2177		trace_gma_translate(vgpu->id, "ppgtt", 0,
   2178				    mm->ppgtt_mm.root_entry_type, gma, gpa);
   2179	}
   2180
   2181	return gpa;
   2182err:
   2183	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
   2184	return INTEL_GVT_INVALID_ADDR;
   2185}
   2186
   2187static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
   2188	unsigned int off, void *p_data, unsigned int bytes)
   2189{
   2190	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
   2191	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
   2192	unsigned long index = off >> info->gtt_entry_size_shift;
   2193	unsigned long gma;
   2194	struct intel_gvt_gtt_entry e;
   2195
   2196	if (bytes != 4 && bytes != 8)
   2197		return -EINVAL;
   2198
   2199	gma = index << I915_GTT_PAGE_SHIFT;
   2200	if (!intel_gvt_ggtt_validate_range(vgpu,
   2201					   gma, 1 << I915_GTT_PAGE_SHIFT)) {
   2202		gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma);
   2203		memset(p_data, 0, bytes);
   2204		return 0;
   2205	}
   2206
   2207	ggtt_get_guest_entry(ggtt_mm, &e, index);
   2208	memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
   2209			bytes);
   2210	return 0;
   2211}
   2212
   2213/**
   2214 * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read
   2215 * @vgpu: a vGPU
   2216 * @off: register offset
   2217 * @p_data: data will be returned to guest
   2218 * @bytes: data length
   2219 *
   2220 * This function is used to emulate the GTT MMIO register read
   2221 *
   2222 * Returns:
   2223 * Zero on success, error code if failed.
   2224 */
   2225int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
   2226	void *p_data, unsigned int bytes)
   2227{
   2228	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
   2229	int ret;
   2230
   2231	if (bytes != 4 && bytes != 8)
   2232		return -EINVAL;
   2233
   2234	off -= info->gtt_start_offset;
   2235	ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
   2236	return ret;
   2237}
   2238
   2239static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
   2240		struct intel_gvt_gtt_entry *entry)
   2241{
   2242	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
   2243	unsigned long pfn;
   2244
   2245	pfn = pte_ops->get_pfn(entry);
   2246	if (pfn != vgpu->gvt->gtt.scratch_mfn)
   2247		intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
   2248}
   2249
   2250static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
   2251	void *p_data, unsigned int bytes)
   2252{
   2253	struct intel_gvt *gvt = vgpu->gvt;
   2254	const struct intel_gvt_device_info *info = &gvt->device_info;
   2255	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
   2256	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
   2257	unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
   2258	unsigned long gma, gfn;
   2259	struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
   2260	struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
   2261	dma_addr_t dma_addr;
   2262	int ret;
   2263	struct intel_gvt_partial_pte *partial_pte, *pos, *n;
   2264	bool partial_update = false;
   2265
   2266	if (bytes != 4 && bytes != 8)
   2267		return -EINVAL;
   2268
   2269	gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
   2270
   2271	/* the VM may configure the whole GM space when ballooning is used */
   2272	if (!vgpu_gmadr_is_valid(vgpu, gma))
   2273		return 0;
   2274
   2275	e.type = GTT_TYPE_GGTT_PTE;
   2276	memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
   2277			bytes);
   2278
   2279	/* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
   2280	 * write, save the first 4 bytes in a list and update virtual
   2281	 * PTE. Only update shadow PTE when the second 4 bytes comes.
   2282	 */
   2283	if (bytes < info->gtt_entry_size) {
   2284		bool found = false;
   2285
   2286		list_for_each_entry_safe(pos, n,
   2287				&ggtt_mm->ggtt_mm.partial_pte_list, list) {
   2288			if (g_gtt_index == pos->offset >>
   2289					info->gtt_entry_size_shift) {
   2290				if (off != pos->offset) {
   2291					/* the second partial part*/
   2292					int last_off = pos->offset &
   2293						(info->gtt_entry_size - 1);
   2294
   2295					memcpy((void *)&e.val64 + last_off,
   2296						(void *)&pos->data + last_off,
   2297						bytes);
   2298
   2299					list_del(&pos->list);
   2300					kfree(pos);
   2301					found = true;
   2302					break;
   2303				}
   2304
   2305				/* update of the first partial part */
   2306				pos->data = e.val64;
   2307				ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
   2308				return 0;
   2309			}
   2310		}
   2311
   2312		if (!found) {
   2313			/* the first partial part */
   2314			partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
   2315			if (!partial_pte)
   2316				return -ENOMEM;
   2317			partial_pte->offset = off;
   2318			partial_pte->data = e.val64;
   2319			list_add_tail(&partial_pte->list,
   2320				&ggtt_mm->ggtt_mm.partial_pte_list);
   2321			partial_update = true;
   2322		}
   2323	}
   2324
   2325	if (!partial_update && (ops->test_present(&e))) {
   2326		gfn = ops->get_pfn(&e);
   2327		m.val64 = e.val64;
   2328		m.type = e.type;
   2329
   2330		/* one PTE update may be issued in multiple writes and the
   2331		 * first write may not construct a valid gfn
   2332		 */
   2333		if (!intel_gvt_is_valid_gfn(vgpu, gfn)) {
   2334			ops->set_pfn(&m, gvt->gtt.scratch_mfn);
   2335			goto out;
   2336		}
   2337
   2338		ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE,
   2339						   &dma_addr);
   2340		if (ret) {
   2341			gvt_vgpu_err("fail to populate guest ggtt entry\n");
   2342			/* guest driver may read/write the entry when partial
   2343			 * update the entry in this situation p2m will fail
   2344			 * settting the shadow entry to point to a scratch page
   2345			 */
   2346			ops->set_pfn(&m, gvt->gtt.scratch_mfn);
   2347		} else
   2348			ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
   2349	} else {
   2350		ops->set_pfn(&m, gvt->gtt.scratch_mfn);
   2351		ops->clear_present(&m);
   2352	}
   2353
   2354out:
   2355	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
   2356
   2357	ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
   2358	ggtt_invalidate_pte(vgpu, &e);
   2359
   2360	ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
   2361	ggtt_invalidate(gvt->gt);
   2362	return 0;
   2363}
   2364
   2365/*
   2366 * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
   2367 * @vgpu: a vGPU
   2368 * @off: register offset
   2369 * @p_data: data from guest write
   2370 * @bytes: data length
   2371 *
   2372 * This function is used to emulate the GTT MMIO register write
   2373 *
   2374 * Returns:
   2375 * Zero on success, error code if failed.
   2376 */
   2377int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
   2378		unsigned int off, void *p_data, unsigned int bytes)
   2379{
   2380	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
   2381	int ret;
   2382	struct intel_vgpu_submission *s = &vgpu->submission;
   2383	struct intel_engine_cs *engine;
   2384	int i;
   2385
   2386	if (bytes != 4 && bytes != 8)
   2387		return -EINVAL;
   2388
   2389	off -= info->gtt_start_offset;
   2390	ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
   2391
   2392	/* if ggtt of last submitted context is written,
   2393	 * that context is probably got unpinned.
   2394	 * Set last shadowed ctx to invalid.
   2395	 */
   2396	for_each_engine(engine, vgpu->gvt->gt, i) {
   2397		if (!s->last_ctx[i].valid)
   2398			continue;
   2399
   2400		if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
   2401			s->last_ctx[i].valid = false;
   2402	}
   2403	return ret;
   2404}
   2405
   2406static int alloc_scratch_pages(struct intel_vgpu *vgpu,
   2407		enum intel_gvt_gtt_type type)
   2408{
   2409	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
   2410	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
   2411	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
   2412	int page_entry_num = I915_GTT_PAGE_SIZE >>
   2413				vgpu->gvt->device_info.gtt_entry_size_shift;
   2414	void *scratch_pt;
   2415	int i;
   2416	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
   2417	dma_addr_t daddr;
   2418
   2419	if (drm_WARN_ON(&i915->drm,
   2420			type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
   2421		return -EINVAL;
   2422
   2423	scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
   2424	if (!scratch_pt) {
   2425		gvt_vgpu_err("fail to allocate scratch page\n");
   2426		return -ENOMEM;
   2427	}
   2428
   2429	daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL);
   2430	if (dma_mapping_error(dev, daddr)) {
   2431		gvt_vgpu_err("fail to dmamap scratch_pt\n");
   2432		__free_page(virt_to_page(scratch_pt));
   2433		return -ENOMEM;
   2434	}
   2435	gtt->scratch_pt[type].page_mfn =
   2436		(unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
   2437	gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
   2438	gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
   2439			vgpu->id, type, gtt->scratch_pt[type].page_mfn);
   2440
   2441	/* Build the tree by full filled the scratch pt with the entries which
   2442	 * point to the next level scratch pt or scratch page. The
   2443	 * scratch_pt[type] indicate the scratch pt/scratch page used by the
   2444	 * 'type' pt.
   2445	 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
   2446	 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
   2447	 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
   2448	 */
   2449	if (type > GTT_TYPE_PPGTT_PTE_PT) {
   2450		struct intel_gvt_gtt_entry se;
   2451
   2452		memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
   2453		se.type = get_entry_type(type - 1);
   2454		ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
   2455
   2456		/* The entry parameters like present/writeable/cache type
   2457		 * set to the same as i915's scratch page tree.
   2458		 */
   2459		se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
   2460		if (type == GTT_TYPE_PPGTT_PDE_PT)
   2461			se.val64 |= PPAT_CACHED;
   2462
   2463		for (i = 0; i < page_entry_num; i++)
   2464			ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
   2465	}
   2466
   2467	return 0;
   2468}
   2469
   2470static int release_scratch_page_tree(struct intel_vgpu *vgpu)
   2471{
   2472	int i;
   2473	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
   2474	dma_addr_t daddr;
   2475
   2476	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
   2477		if (vgpu->gtt.scratch_pt[i].page != NULL) {
   2478			daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
   2479					I915_GTT_PAGE_SHIFT);
   2480			dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
   2481			__free_page(vgpu->gtt.scratch_pt[i].page);
   2482			vgpu->gtt.scratch_pt[i].page = NULL;
   2483			vgpu->gtt.scratch_pt[i].page_mfn = 0;
   2484		}
   2485	}
   2486
   2487	return 0;
   2488}
   2489
   2490static int create_scratch_page_tree(struct intel_vgpu *vgpu)
   2491{
   2492	int i, ret;
   2493
   2494	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
   2495		ret = alloc_scratch_pages(vgpu, i);
   2496		if (ret)
   2497			goto err;
   2498	}
   2499
   2500	return 0;
   2501
   2502err:
   2503	release_scratch_page_tree(vgpu);
   2504	return ret;
   2505}
   2506
   2507/**
   2508 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
   2509 * @vgpu: a vGPU
   2510 *
   2511 * This function is used to initialize per-vGPU graphics memory virtualization
   2512 * components.
   2513 *
   2514 * Returns:
   2515 * Zero on success, error code if failed.
   2516 */
   2517int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
   2518{
   2519	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
   2520
   2521	INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
   2522
   2523	INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
   2524	INIT_LIST_HEAD(&gtt->oos_page_list_head);
   2525	INIT_LIST_HEAD(&gtt->post_shadow_list_head);
   2526
   2527	gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
   2528	if (IS_ERR(gtt->ggtt_mm)) {
   2529		gvt_vgpu_err("fail to create mm for ggtt.\n");
   2530		return PTR_ERR(gtt->ggtt_mm);
   2531	}
   2532
   2533	intel_vgpu_reset_ggtt(vgpu, false);
   2534
   2535	INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
   2536
   2537	return create_scratch_page_tree(vgpu);
   2538}
   2539
   2540void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
   2541{
   2542	struct list_head *pos, *n;
   2543	struct intel_vgpu_mm *mm;
   2544
   2545	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
   2546		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
   2547		intel_vgpu_destroy_mm(mm);
   2548	}
   2549
   2550	if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
   2551		gvt_err("vgpu ppgtt mm is not fully destroyed\n");
   2552
   2553	if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
   2554		gvt_err("Why we still has spt not freed?\n");
   2555		ppgtt_free_all_spt(vgpu);
   2556	}
   2557}
   2558
   2559static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
   2560{
   2561	struct intel_gvt_partial_pte *pos, *next;
   2562
   2563	list_for_each_entry_safe(pos, next,
   2564				 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
   2565				 list) {
   2566		gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
   2567			pos->offset, pos->data);
   2568		kfree(pos);
   2569	}
   2570	intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
   2571	vgpu->gtt.ggtt_mm = NULL;
   2572}
   2573
   2574/**
   2575 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
   2576 * @vgpu: a vGPU
   2577 *
   2578 * This function is used to clean up per-vGPU graphics memory virtualization
   2579 * components.
   2580 *
   2581 * Returns:
   2582 * Zero on success, error code if failed.
   2583 */
   2584void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
   2585{
   2586	intel_vgpu_destroy_all_ppgtt_mm(vgpu);
   2587	intel_vgpu_destroy_ggtt_mm(vgpu);
   2588	release_scratch_page_tree(vgpu);
   2589}
   2590
   2591static void clean_spt_oos(struct intel_gvt *gvt)
   2592{
   2593	struct intel_gvt_gtt *gtt = &gvt->gtt;
   2594	struct list_head *pos, *n;
   2595	struct intel_vgpu_oos_page *oos_page;
   2596
   2597	WARN(!list_empty(&gtt->oos_page_use_list_head),
   2598		"someone is still using oos page\n");
   2599
   2600	list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
   2601		oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
   2602		list_del(&oos_page->list);
   2603		free_page((unsigned long)oos_page->mem);
   2604		kfree(oos_page);
   2605	}
   2606}
   2607
   2608static int setup_spt_oos(struct intel_gvt *gvt)
   2609{
   2610	struct intel_gvt_gtt *gtt = &gvt->gtt;
   2611	struct intel_vgpu_oos_page *oos_page;
   2612	int i;
   2613	int ret;
   2614
   2615	INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
   2616	INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
   2617
   2618	for (i = 0; i < preallocated_oos_pages; i++) {
   2619		oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
   2620		if (!oos_page) {
   2621			ret = -ENOMEM;
   2622			goto fail;
   2623		}
   2624		oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0);
   2625		if (!oos_page->mem) {
   2626			ret = -ENOMEM;
   2627			kfree(oos_page);
   2628			goto fail;
   2629		}
   2630
   2631		INIT_LIST_HEAD(&oos_page->list);
   2632		INIT_LIST_HEAD(&oos_page->vm_list);
   2633		oos_page->id = i;
   2634		list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
   2635	}
   2636
   2637	gvt_dbg_mm("%d oos pages preallocated\n", i);
   2638
   2639	return 0;
   2640fail:
   2641	clean_spt_oos(gvt);
   2642	return ret;
   2643}
   2644
   2645/**
   2646 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
   2647 * @vgpu: a vGPU
   2648 * @pdps: pdp root array
   2649 *
   2650 * This function is used to find a PPGTT mm object from mm object pool
   2651 *
   2652 * Returns:
   2653 * pointer to mm object on success, NULL if failed.
   2654 */
   2655struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
   2656		u64 pdps[])
   2657{
   2658	struct intel_vgpu_mm *mm;
   2659	struct list_head *pos;
   2660
   2661	list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
   2662		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
   2663
   2664		switch (mm->ppgtt_mm.root_entry_type) {
   2665		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
   2666			if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
   2667				return mm;
   2668			break;
   2669		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
   2670			if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
   2671				    sizeof(mm->ppgtt_mm.guest_pdps)))
   2672				return mm;
   2673			break;
   2674		default:
   2675			GEM_BUG_ON(1);
   2676		}
   2677	}
   2678	return NULL;
   2679}
   2680
   2681/**
   2682 * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
   2683 * @vgpu: a vGPU
   2684 * @root_entry_type: ppgtt root entry type
   2685 * @pdps: guest pdps
   2686 *
   2687 * This function is used to find or create a PPGTT mm object from a guest.
   2688 *
   2689 * Returns:
   2690 * Zero on success, negative error code if failed.
   2691 */
   2692struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
   2693		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
   2694{
   2695	struct intel_vgpu_mm *mm;
   2696
   2697	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
   2698	if (mm) {
   2699		intel_vgpu_mm_get(mm);
   2700	} else {
   2701		mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
   2702		if (IS_ERR(mm))
   2703			gvt_vgpu_err("fail to create mm\n");
   2704	}
   2705	return mm;
   2706}
   2707
   2708/**
   2709 * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
   2710 * @vgpu: a vGPU
   2711 * @pdps: guest pdps
   2712 *
   2713 * This function is used to find a PPGTT mm object from a guest and destroy it.
   2714 *
   2715 * Returns:
   2716 * Zero on success, negative error code if failed.
   2717 */
   2718int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
   2719{
   2720	struct intel_vgpu_mm *mm;
   2721
   2722	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
   2723	if (!mm) {
   2724		gvt_vgpu_err("fail to find ppgtt instance.\n");
   2725		return -EINVAL;
   2726	}
   2727	intel_vgpu_mm_put(mm);
   2728	return 0;
   2729}
   2730
   2731/**
   2732 * intel_gvt_init_gtt - initialize mm components of a GVT device
   2733 * @gvt: GVT device
   2734 *
   2735 * This function is called at the initialization stage, to initialize
   2736 * the mm components of a GVT device.
   2737 *
   2738 * Returns:
   2739 * zero on success, negative error code if failed.
   2740 */
   2741int intel_gvt_init_gtt(struct intel_gvt *gvt)
   2742{
   2743	int ret;
   2744	void *page;
   2745	struct device *dev = gvt->gt->i915->drm.dev;
   2746	dma_addr_t daddr;
   2747
   2748	gvt_dbg_core("init gtt\n");
   2749
   2750	gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
   2751	gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
   2752
   2753	page = (void *)get_zeroed_page(GFP_KERNEL);
   2754	if (!page) {
   2755		gvt_err("fail to allocate scratch ggtt page\n");
   2756		return -ENOMEM;
   2757	}
   2758
   2759	daddr = dma_map_page(dev, virt_to_page(page), 0,
   2760			4096, DMA_BIDIRECTIONAL);
   2761	if (dma_mapping_error(dev, daddr)) {
   2762		gvt_err("fail to dmamap scratch ggtt page\n");
   2763		__free_page(virt_to_page(page));
   2764		return -ENOMEM;
   2765	}
   2766
   2767	gvt->gtt.scratch_page = virt_to_page(page);
   2768	gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
   2769
   2770	if (enable_out_of_sync) {
   2771		ret = setup_spt_oos(gvt);
   2772		if (ret) {
   2773			gvt_err("fail to initialize SPT oos\n");
   2774			dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
   2775			__free_page(gvt->gtt.scratch_page);
   2776			return ret;
   2777		}
   2778	}
   2779	INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
   2780	mutex_init(&gvt->gtt.ppgtt_mm_lock);
   2781	return 0;
   2782}
   2783
   2784/**
   2785 * intel_gvt_clean_gtt - clean up mm components of a GVT device
   2786 * @gvt: GVT device
   2787 *
   2788 * This function is called at the driver unloading stage, to clean up the
   2789 * the mm components of a GVT device.
   2790 *
   2791 */
   2792void intel_gvt_clean_gtt(struct intel_gvt *gvt)
   2793{
   2794	struct device *dev = gvt->gt->i915->drm.dev;
   2795	dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
   2796					I915_GTT_PAGE_SHIFT);
   2797
   2798	dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
   2799
   2800	__free_page(gvt->gtt.scratch_page);
   2801
   2802	if (enable_out_of_sync)
   2803		clean_spt_oos(gvt);
   2804}
   2805
   2806/**
   2807 * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
   2808 * @vgpu: a vGPU
   2809 *
   2810 * This function is called when invalidate all PPGTT instances of a vGPU.
   2811 *
   2812 */
   2813void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
   2814{
   2815	struct list_head *pos, *n;
   2816	struct intel_vgpu_mm *mm;
   2817
   2818	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
   2819		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
   2820		if (mm->type == INTEL_GVT_MM_PPGTT) {
   2821			mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock);
   2822			list_del_init(&mm->ppgtt_mm.lru_list);
   2823			mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock);
   2824			if (mm->ppgtt_mm.shadowed)
   2825				invalidate_ppgtt_mm(mm);
   2826		}
   2827	}
   2828}
   2829
   2830/**
   2831 * intel_vgpu_reset_ggtt - reset the GGTT entry
   2832 * @vgpu: a vGPU
   2833 * @invalidate_old: invalidate old entries
   2834 *
   2835 * This function is called at the vGPU create stage
   2836 * to reset all the GGTT entries.
   2837 *
   2838 */
   2839void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
   2840{
   2841	struct intel_gvt *gvt = vgpu->gvt;
   2842	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
   2843	struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
   2844	struct intel_gvt_gtt_entry old_entry;
   2845	u32 index;
   2846	u32 num_entries;
   2847
   2848	pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
   2849	pte_ops->set_present(&entry);
   2850
   2851	index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
   2852	num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
   2853	while (num_entries--) {
   2854		if (invalidate_old) {
   2855			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
   2856			ggtt_invalidate_pte(vgpu, &old_entry);
   2857		}
   2858		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
   2859	}
   2860
   2861	index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
   2862	num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
   2863	while (num_entries--) {
   2864		if (invalidate_old) {
   2865			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
   2866			ggtt_invalidate_pte(vgpu, &old_entry);
   2867		}
   2868		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
   2869	}
   2870
   2871	ggtt_invalidate(gvt->gt);
   2872}
   2873
   2874/**
   2875 * intel_vgpu_reset_gtt - reset the all GTT related status
   2876 * @vgpu: a vGPU
   2877 *
   2878 * This function is called from vfio core to reset reset all
   2879 * GTT related status, including GGTT, PPGTT, scratch page.
   2880 *
   2881 */
   2882void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
   2883{
   2884	/* Shadow pages are only created when there is no page
   2885	 * table tracking data, so remove page tracking data after
   2886	 * removing the shadow pages.
   2887	 */
   2888	intel_vgpu_destroy_all_ppgtt_mm(vgpu);
   2889	intel_vgpu_reset_ggtt(vgpu, true);
   2890}
   2891
   2892/**
   2893 * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
   2894 * @gvt: intel gvt device
   2895 *
   2896 * This function is called at driver resume stage to restore
   2897 * GGTT entries of every vGPU.
   2898 *
   2899 */
   2900void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
   2901{
   2902	struct intel_vgpu *vgpu;
   2903	struct intel_vgpu_mm *mm;
   2904	int id;
   2905	gen8_pte_t pte;
   2906	u32 idx, num_low, num_hi, offset;
   2907
   2908	/* Restore dirty host ggtt for all vGPUs */
   2909	idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) {
   2910		mm = vgpu->gtt.ggtt_mm;
   2911
   2912		num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
   2913		offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
   2914		for (idx = 0; idx < num_low; idx++) {
   2915			pte = mm->ggtt_mm.host_ggtt_aperture[idx];
   2916			if (pte & GEN8_PAGE_PRESENT)
   2917				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
   2918		}
   2919
   2920		num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
   2921		offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
   2922		for (idx = 0; idx < num_hi; idx++) {
   2923			pte = mm->ggtt_mm.host_ggtt_hidden[idx];
   2924			if (pte & GEN8_PAGE_PRESENT)
   2925				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
   2926		}
   2927	}
   2928}