cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

amdgpu_vm_pt.c (26150B)


      1// SPDX-License-Identifier: GPL-2.0 OR MIT
      2/*
      3 * Copyright 2022 Advanced Micro Devices, Inc.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the "Software"),
      7 * to deal in the Software without restriction, including without limitation
      8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9 * and/or sell copies of the Software, and to permit persons to whom the
     10 * Software is furnished to do so, subject to the following conditions:
     11 *
     12 * The above copyright notice and this permission notice shall be included in
     13 * all copies or substantial portions of the Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21 * OTHER DEALINGS IN THE SOFTWARE.
     22 */
     23
     24#include <drm/drm_drv.h>
     25
     26#include "amdgpu.h"
     27#include "amdgpu_trace.h"
     28#include "amdgpu_vm.h"
     29
     30/*
     31 * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
     32 */
     33struct amdgpu_vm_pt_cursor {
     34	uint64_t pfn;
     35	struct amdgpu_vm_bo_base *parent;
     36	struct amdgpu_vm_bo_base *entry;
     37	unsigned int level;
     38};
     39
     40/**
     41 * amdgpu_vm_pt_level_shift - return the addr shift for each level
     42 *
     43 * @adev: amdgpu_device pointer
     44 * @level: VMPT level
     45 *
     46 * Returns:
     47 * The number of bits the pfn needs to be right shifted for a level.
     48 */
     49static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev,
     50					     unsigned int level)
     51{
     52	switch (level) {
     53	case AMDGPU_VM_PDB2:
     54	case AMDGPU_VM_PDB1:
     55	case AMDGPU_VM_PDB0:
     56		return 9 * (AMDGPU_VM_PDB0 - level) +
     57			adev->vm_manager.block_size;
     58	case AMDGPU_VM_PTB:
     59		return 0;
     60	default:
     61		return ~0;
     62	}
     63}
     64
     65/**
     66 * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT
     67 *
     68 * @adev: amdgpu_device pointer
     69 * @level: VMPT level
     70 *
     71 * Returns:
     72 * The number of entries in a page directory or page table.
     73 */
     74static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
     75					     unsigned int level)
     76{
     77	unsigned int shift;
     78
     79	shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
     80	if (level == adev->vm_manager.root_level)
     81		/* For the root directory */
     82		return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
     83			>> shift;
     84	else if (level != AMDGPU_VM_PTB)
     85		/* Everything in between */
     86		return 512;
     87
     88	/* For the page tables on the leaves */
     89	return AMDGPU_VM_PTE_COUNT(adev);
     90}
     91
     92/**
     93 * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD
     94 *
     95 * @adev: amdgpu_device pointer
     96 *
     97 * Returns:
     98 * The number of entries in the root page directory which needs the ATS setting.
     99 */
    100static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev)
    101{
    102	unsigned int shift;
    103
    104	shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
    105	return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
    106}
    107
    108/**
    109 * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
    110 *
    111 * @adev: amdgpu_device pointer
    112 * @level: VMPT level
    113 *
    114 * Returns:
    115 * The mask to extract the entry number of a PD/PT from an address.
    116 */
    117static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev,
    118					  unsigned int level)
    119{
    120	if (level <= adev->vm_manager.root_level)
    121		return 0xffffffff;
    122	else if (level != AMDGPU_VM_PTB)
    123		return 0x1ff;
    124	else
    125		return AMDGPU_VM_PTE_COUNT(adev) - 1;
    126}
    127
    128/**
    129 * amdgpu_vm_pt_size - returns the size of the page table in bytes
    130 *
    131 * @adev: amdgpu_device pointer
    132 * @level: VMPT level
    133 *
    134 * Returns:
    135 * The size of the BO for a page directory or page table in bytes.
    136 */
    137static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev,
    138				      unsigned int level)
    139{
    140	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8);
    141}
    142
    143/**
    144 * amdgpu_vm_pt_parent - get the parent page directory
    145 *
    146 * @pt: child page table
    147 *
    148 * Helper to get the parent entry for the child page table. NULL if we are at
    149 * the root page directory.
    150 */
    151static struct amdgpu_vm_bo_base *
    152amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt)
    153{
    154	struct amdgpu_bo *parent = pt->bo->parent;
    155
    156	if (!parent)
    157		return NULL;
    158
    159	return parent->vm_bo;
    160}
    161
    162/**
    163 * amdgpu_vm_pt_start - start PD/PT walk
    164 *
    165 * @adev: amdgpu_device pointer
    166 * @vm: amdgpu_vm structure
    167 * @start: start address of the walk
    168 * @cursor: state to initialize
    169 *
    170 * Initialize a amdgpu_vm_pt_cursor to start a walk.
    171 */
    172static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
    173			       struct amdgpu_vm *vm, uint64_t start,
    174			       struct amdgpu_vm_pt_cursor *cursor)
    175{
    176	cursor->pfn = start;
    177	cursor->parent = NULL;
    178	cursor->entry = &vm->root;
    179	cursor->level = adev->vm_manager.root_level;
    180}
    181
    182/**
    183 * amdgpu_vm_pt_descendant - go to child node
    184 *
    185 * @adev: amdgpu_device pointer
    186 * @cursor: current state
    187 *
    188 * Walk to the child node of the current node.
    189 * Returns:
    190 * True if the walk was possible, false otherwise.
    191 */
    192static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
    193				    struct amdgpu_vm_pt_cursor *cursor)
    194{
    195	unsigned int mask, shift, idx;
    196
    197	if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry ||
    198	    !cursor->entry->bo)
    199		return false;
    200
    201	mask = amdgpu_vm_pt_entries_mask(adev, cursor->level);
    202	shift = amdgpu_vm_pt_level_shift(adev, cursor->level);
    203
    204	++cursor->level;
    205	idx = (cursor->pfn >> shift) & mask;
    206	cursor->parent = cursor->entry;
    207	cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx];
    208	return true;
    209}
    210
    211/**
    212 * amdgpu_vm_pt_sibling - go to sibling node
    213 *
    214 * @adev: amdgpu_device pointer
    215 * @cursor: current state
    216 *
    217 * Walk to the sibling node of the current node.
    218 * Returns:
    219 * True if the walk was possible, false otherwise.
    220 */
    221static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
    222				 struct amdgpu_vm_pt_cursor *cursor)
    223{
    224
    225	unsigned int shift, num_entries;
    226	struct amdgpu_bo_vm *parent;
    227
    228	/* Root doesn't have a sibling */
    229	if (!cursor->parent)
    230		return false;
    231
    232	/* Go to our parents and see if we got a sibling */
    233	shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1);
    234	num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1);
    235	parent = to_amdgpu_bo_vm(cursor->parent->bo);
    236
    237	if (cursor->entry == &parent->entries[num_entries - 1])
    238		return false;
    239
    240	cursor->pfn += 1ULL << shift;
    241	cursor->pfn &= ~((1ULL << shift) - 1);
    242	++cursor->entry;
    243	return true;
    244}
    245
    246/**
    247 * amdgpu_vm_pt_ancestor - go to parent node
    248 *
    249 * @cursor: current state
    250 *
    251 * Walk to the parent node of the current node.
    252 * Returns:
    253 * True if the walk was possible, false otherwise.
    254 */
    255static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
    256{
    257	if (!cursor->parent)
    258		return false;
    259
    260	--cursor->level;
    261	cursor->entry = cursor->parent;
    262	cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
    263	return true;
    264}
    265
    266/**
    267 * amdgpu_vm_pt_next - get next PD/PT in hieratchy
    268 *
    269 * @adev: amdgpu_device pointer
    270 * @cursor: current state
    271 *
    272 * Walk the PD/PT tree to the next node.
    273 */
    274static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
    275			      struct amdgpu_vm_pt_cursor *cursor)
    276{
    277	/* First try a newborn child */
    278	if (amdgpu_vm_pt_descendant(adev, cursor))
    279		return;
    280
    281	/* If that didn't worked try to find a sibling */
    282	while (!amdgpu_vm_pt_sibling(adev, cursor)) {
    283		/* No sibling, go to our parents and grandparents */
    284		if (!amdgpu_vm_pt_ancestor(cursor)) {
    285			cursor->pfn = ~0ll;
    286			return;
    287		}
    288	}
    289}
    290
    291/**
    292 * amdgpu_vm_pt_first_dfs - start a deep first search
    293 *
    294 * @adev: amdgpu_device structure
    295 * @vm: amdgpu_vm structure
    296 * @start: optional cursor to start with
    297 * @cursor: state to initialize
    298 *
    299 * Starts a deep first traversal of the PD/PT tree.
    300 */
    301static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
    302				   struct amdgpu_vm *vm,
    303				   struct amdgpu_vm_pt_cursor *start,
    304				   struct amdgpu_vm_pt_cursor *cursor)
    305{
    306	if (start)
    307		*cursor = *start;
    308	else
    309		amdgpu_vm_pt_start(adev, vm, 0, cursor);
    310
    311	while (amdgpu_vm_pt_descendant(adev, cursor))
    312		;
    313}
    314
    315/**
    316 * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
    317 *
    318 * @start: starting point for the search
    319 * @entry: current entry
    320 *
    321 * Returns:
    322 * True when the search should continue, false otherwise.
    323 */
    324static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
    325				      struct amdgpu_vm_bo_base *entry)
    326{
    327	return entry && (!start || entry != start->entry);
    328}
    329
    330/**
    331 * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
    332 *
    333 * @adev: amdgpu_device structure
    334 * @cursor: current state
    335 *
    336 * Move the cursor to the next node in a deep first search.
    337 */
    338static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
    339				  struct amdgpu_vm_pt_cursor *cursor)
    340{
    341	if (!cursor->entry)
    342		return;
    343
    344	if (!cursor->parent)
    345		cursor->entry = NULL;
    346	else if (amdgpu_vm_pt_sibling(adev, cursor))
    347		while (amdgpu_vm_pt_descendant(adev, cursor))
    348			;
    349	else
    350		amdgpu_vm_pt_ancestor(cursor);
    351}
    352
    353/*
    354 * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
    355 */
    356#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)		\
    357	for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)),		\
    358	     (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
    359	     amdgpu_vm_pt_continue_dfs((start), (entry));			\
    360	     (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
    361
    362/**
    363 * amdgpu_vm_pt_clear - initially clear the PDs/PTs
    364 *
    365 * @adev: amdgpu_device pointer
    366 * @vm: VM to clear BO from
    367 * @vmbo: BO to clear
    368 * @immediate: use an immediate update
    369 *
    370 * Root PD needs to be reserved when calling this.
    371 *
    372 * Returns:
    373 * 0 on success, errno otherwise.
    374 */
    375int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
    376		       struct amdgpu_bo_vm *vmbo, bool immediate)
    377{
    378	unsigned int level = adev->vm_manager.root_level;
    379	struct ttm_operation_ctx ctx = { true, false };
    380	struct amdgpu_vm_update_params params;
    381	struct amdgpu_bo *ancestor = &vmbo->bo;
    382	unsigned int entries, ats_entries;
    383	struct amdgpu_bo *bo = &vmbo->bo;
    384	uint64_t addr;
    385	int r, idx;
    386
    387	/* Figure out our place in the hierarchy */
    388	if (ancestor->parent) {
    389		++level;
    390		while (ancestor->parent->parent) {
    391			++level;
    392			ancestor = ancestor->parent;
    393		}
    394	}
    395
    396	entries = amdgpu_bo_size(bo) / 8;
    397	if (!vm->pte_support_ats) {
    398		ats_entries = 0;
    399
    400	} else if (!bo->parent) {
    401		ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
    402		ats_entries = min(ats_entries, entries);
    403		entries -= ats_entries;
    404
    405	} else {
    406		struct amdgpu_vm_bo_base *pt;
    407
    408		pt = ancestor->vm_bo;
    409		ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
    410		if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >=
    411		    ats_entries) {
    412			ats_entries = 0;
    413		} else {
    414			ats_entries = entries;
    415			entries = 0;
    416		}
    417	}
    418
    419	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
    420	if (r)
    421		return r;
    422
    423	if (vmbo->shadow) {
    424		struct amdgpu_bo *shadow = vmbo->shadow;
    425
    426		r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx);
    427		if (r)
    428			return r;
    429	}
    430
    431	if (!drm_dev_enter(adev_to_drm(adev), &idx))
    432		return -ENODEV;
    433
    434	r = vm->update_funcs->map_table(vmbo);
    435	if (r)
    436		goto exit;
    437
    438	memset(&params, 0, sizeof(params));
    439	params.adev = adev;
    440	params.vm = vm;
    441	params.immediate = immediate;
    442
    443	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
    444	if (r)
    445		goto exit;
    446
    447	addr = 0;
    448	if (ats_entries) {
    449		uint64_t value = 0, flags;
    450
    451		flags = AMDGPU_PTE_DEFAULT_ATC;
    452		if (level != AMDGPU_VM_PTB) {
    453			/* Handle leaf PDEs as PTEs */
    454			flags |= AMDGPU_PDE_PTE;
    455			amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
    456		}
    457
    458		r = vm->update_funcs->update(&params, vmbo, addr, 0,
    459					     ats_entries, value, flags);
    460		if (r)
    461			goto exit;
    462
    463		addr += ats_entries * 8;
    464	}
    465
    466	if (entries) {
    467		uint64_t value = 0, flags = 0;
    468
    469		if (adev->asic_type >= CHIP_VEGA10) {
    470			if (level != AMDGPU_VM_PTB) {
    471				/* Handle leaf PDEs as PTEs */
    472				flags |= AMDGPU_PDE_PTE;
    473				amdgpu_gmc_get_vm_pde(adev, level,
    474						      &value, &flags);
    475			} else {
    476				/* Workaround for fault priority problem on GMC9 */
    477				flags = AMDGPU_PTE_EXECUTABLE;
    478			}
    479		}
    480
    481		r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
    482					     value, flags);
    483		if (r)
    484			goto exit;
    485	}
    486
    487	r = vm->update_funcs->commit(&params, NULL);
    488exit:
    489	drm_dev_exit(idx);
    490	return r;
    491}
    492
    493/**
    494 * amdgpu_vm_pt_create - create bo for PD/PT
    495 *
    496 * @adev: amdgpu_device pointer
    497 * @vm: requesting vm
    498 * @level: the page table level
    499 * @immediate: use a immediate update
    500 * @vmbo: pointer to the buffer object pointer
    501 */
    502int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
    503			int level, bool immediate, struct amdgpu_bo_vm **vmbo)
    504{
    505	struct amdgpu_bo_param bp;
    506	struct amdgpu_bo *bo;
    507	struct dma_resv *resv;
    508	unsigned int num_entries;
    509	int r;
    510
    511	memset(&bp, 0, sizeof(bp));
    512
    513	bp.size = amdgpu_vm_pt_size(adev, level);
    514	bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
    515	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
    516	bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
    517	bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
    518		AMDGPU_GEM_CREATE_CPU_GTT_USWC;
    519
    520	if (level < AMDGPU_VM_PTB)
    521		num_entries = amdgpu_vm_pt_num_entries(adev, level);
    522	else
    523		num_entries = 0;
    524
    525	bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
    526
    527	if (vm->use_cpu_for_update)
    528		bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
    529
    530	bp.type = ttm_bo_type_kernel;
    531	bp.no_wait_gpu = immediate;
    532	if (vm->root.bo)
    533		bp.resv = vm->root.bo->tbo.base.resv;
    534
    535	r = amdgpu_bo_create_vm(adev, &bp, vmbo);
    536	if (r)
    537		return r;
    538
    539	bo = &(*vmbo)->bo;
    540	if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) {
    541		(*vmbo)->shadow = NULL;
    542		return 0;
    543	}
    544
    545	if (!bp.resv)
    546		WARN_ON(dma_resv_lock(bo->tbo.base.resv,
    547				      NULL));
    548	resv = bp.resv;
    549	memset(&bp, 0, sizeof(bp));
    550	bp.size = amdgpu_vm_pt_size(adev, level);
    551	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
    552	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
    553	bp.type = ttm_bo_type_kernel;
    554	bp.resv = bo->tbo.base.resv;
    555	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
    556
    557	r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
    558
    559	if (!resv)
    560		dma_resv_unlock(bo->tbo.base.resv);
    561
    562	if (r) {
    563		amdgpu_bo_unref(&bo);
    564		return r;
    565	}
    566
    567	(*vmbo)->shadow->parent = amdgpu_bo_ref(bo);
    568	amdgpu_bo_add_to_shadow_list(*vmbo);
    569
    570	return 0;
    571}
    572
    573/**
    574 * amdgpu_vm_pt_alloc - Allocate a specific page table
    575 *
    576 * @adev: amdgpu_device pointer
    577 * @vm: VM to allocate page tables for
    578 * @cursor: Which page table to allocate
    579 * @immediate: use an immediate update
    580 *
    581 * Make sure a specific page table or directory is allocated.
    582 *
    583 * Returns:
    584 * 1 if page table needed to be allocated, 0 if page table was already
    585 * allocated, negative errno if an error occurred.
    586 */
    587static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
    588			      struct amdgpu_vm *vm,
    589			      struct amdgpu_vm_pt_cursor *cursor,
    590			      bool immediate)
    591{
    592	struct amdgpu_vm_bo_base *entry = cursor->entry;
    593	struct amdgpu_bo *pt_bo;
    594	struct amdgpu_bo_vm *pt;
    595	int r;
    596
    597	if (entry->bo)
    598		return 0;
    599
    600	r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
    601	if (r)
    602		return r;
    603
    604	/* Keep a reference to the root directory to avoid
    605	 * freeing them up in the wrong order.
    606	 */
    607	pt_bo = &pt->bo;
    608	pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo);
    609	amdgpu_vm_bo_base_init(entry, vm, pt_bo);
    610	r = amdgpu_vm_pt_clear(adev, vm, pt, immediate);
    611	if (r)
    612		goto error_free_pt;
    613
    614	return 0;
    615
    616error_free_pt:
    617	amdgpu_bo_unref(&pt->shadow);
    618	amdgpu_bo_unref(&pt_bo);
    619	return r;
    620}
    621
    622/**
    623 * amdgpu_vm_pt_free - free one PD/PT
    624 *
    625 * @entry: PDE to free
    626 */
    627static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
    628{
    629	struct amdgpu_bo *shadow;
    630
    631	if (!entry->bo)
    632		return;
    633	shadow = amdgpu_bo_shadowed(entry->bo);
    634	if (shadow) {
    635		ttm_bo_set_bulk_move(&shadow->tbo, NULL);
    636		amdgpu_bo_unref(&shadow);
    637	}
    638	ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
    639	entry->bo->vm_bo = NULL;
    640	list_del(&entry->vm_status);
    641	amdgpu_bo_unref(&entry->bo);
    642}
    643
    644/**
    645 * amdgpu_vm_pt_free_dfs - free PD/PT levels
    646 *
    647 * @adev: amdgpu device structure
    648 * @vm: amdgpu vm structure
    649 * @start: optional cursor where to start freeing PDs/PTs
    650 *
    651 * Free the page directory or page table level and all sub levels.
    652 */
    653static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
    654				  struct amdgpu_vm *vm,
    655				  struct amdgpu_vm_pt_cursor *start)
    656{
    657	struct amdgpu_vm_pt_cursor cursor;
    658	struct amdgpu_vm_bo_base *entry;
    659
    660	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
    661		amdgpu_vm_pt_free(entry);
    662
    663	if (start)
    664		amdgpu_vm_pt_free(start->entry);
    665}
    666
    667/**
    668 * amdgpu_vm_pt_free_root - free root PD
    669 * @adev: amdgpu device structure
    670 * @vm: amdgpu vm structure
    671 *
    672 * Free the root page directory and everything below it.
    673 */
    674void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
    675{
    676	amdgpu_vm_pt_free_dfs(adev, vm, NULL);
    677}
    678
    679/**
    680 * amdgpu_vm_pt_is_root_clean - check if a root PD is clean
    681 *
    682 * @adev: amdgpu_device pointer
    683 * @vm: the VM to check
    684 *
    685 * Check all entries of the root PD, if any subsequent PDs are allocated,
    686 * it means there are page table creating and filling, and is no a clean
    687 * VM
    688 *
    689 * Returns:
    690 *	0 if this VM is clean
    691 */
    692bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
    693				struct amdgpu_vm *vm)
    694{
    695	enum amdgpu_vm_level root = adev->vm_manager.root_level;
    696	unsigned int entries = amdgpu_vm_pt_num_entries(adev, root);
    697	unsigned int i = 0;
    698
    699	for (i = 0; i < entries; i++) {
    700		if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo)
    701			return false;
    702	}
    703	return true;
    704}
    705
    706/**
    707 * amdgpu_vm_pde_update - update a single level in the hierarchy
    708 *
    709 * @params: parameters for the update
    710 * @entry: entry to update
    711 *
    712 * Makes sure the requested entry in parent is up to date.
    713 */
    714int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
    715			 struct amdgpu_vm_bo_base *entry)
    716{
    717	struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
    718	struct amdgpu_bo *bo = parent->bo, *pbo;
    719	struct amdgpu_vm *vm = params->vm;
    720	uint64_t pde, pt, flags;
    721	unsigned int level;
    722
    723	for (level = 0, pbo = bo->parent; pbo; ++level)
    724		pbo = pbo->parent;
    725
    726	level += params->adev->vm_manager.root_level;
    727	amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags);
    728	pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8;
    729	return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt,
    730					1, 0, flags);
    731}
    732
    733/*
    734 * amdgpu_vm_pte_update_flags - figure out flags for PTE updates
    735 *
    736 * Make sure to set the right flags for the PTEs at the desired level.
    737 */
    738static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
    739				       struct amdgpu_bo_vm *pt,
    740				       unsigned int level,
    741				       uint64_t pe, uint64_t addr,
    742				       unsigned int count, uint32_t incr,
    743				       uint64_t flags)
    744
    745{
    746	if (level != AMDGPU_VM_PTB) {
    747		flags |= AMDGPU_PDE_PTE;
    748		amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
    749
    750	} else if (params->adev->asic_type >= CHIP_VEGA10 &&
    751		   !(flags & AMDGPU_PTE_VALID) &&
    752		   !(flags & AMDGPU_PTE_PRT)) {
    753
    754		/* Workaround for fault priority problem on GMC9 */
    755		flags |= AMDGPU_PTE_EXECUTABLE;
    756	}
    757
    758	params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
    759					 flags);
    760}
    761
    762/**
    763 * amdgpu_vm_pte_fragment - get fragment for PTEs
    764 *
    765 * @params: see amdgpu_vm_update_params definition
    766 * @start: first PTE to handle
    767 * @end: last PTE to handle
    768 * @flags: hw mapping flags
    769 * @frag: resulting fragment size
    770 * @frag_end: end of this fragment
    771 *
    772 * Returns the first possible fragment for the start and end address.
    773 */
    774static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params,
    775				   uint64_t start, uint64_t end, uint64_t flags,
    776				   unsigned int *frag, uint64_t *frag_end)
    777{
    778	/**
    779	 * The MC L1 TLB supports variable sized pages, based on a fragment
    780	 * field in the PTE. When this field is set to a non-zero value, page
    781	 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
    782	 * flags are considered valid for all PTEs within the fragment range
    783	 * and corresponding mappings are assumed to be physically contiguous.
    784	 *
    785	 * The L1 TLB can store a single PTE for the whole fragment,
    786	 * significantly increasing the space available for translation
    787	 * caching. This leads to large improvements in throughput when the
    788	 * TLB is under pressure.
    789	 *
    790	 * The L2 TLB distributes small and large fragments into two
    791	 * asymmetric partitions. The large fragment cache is significantly
    792	 * larger. Thus, we try to use large fragments wherever possible.
    793	 * Userspace can support this by aligning virtual base address and
    794	 * allocation size to the fragment size.
    795	 *
    796	 * Starting with Vega10 the fragment size only controls the L1. The L2
    797	 * is now directly feed with small/huge/giant pages from the walker.
    798	 */
    799	unsigned int max_frag;
    800
    801	if (params->adev->asic_type < CHIP_VEGA10)
    802		max_frag = params->adev->vm_manager.fragment_size;
    803	else
    804		max_frag = 31;
    805
    806	/* system pages are non continuously */
    807	if (params->pages_addr) {
    808		*frag = 0;
    809		*frag_end = end;
    810		return;
    811	}
    812
    813	/* This intentionally wraps around if no bit is set */
    814	*frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1);
    815	if (*frag >= max_frag) {
    816		*frag = max_frag;
    817		*frag_end = end & ~((1ULL << max_frag) - 1);
    818	} else {
    819		*frag_end = start + (1 << *frag);
    820	}
    821}
    822
    823/**
    824 * amdgpu_vm_ptes_update - make sure that page tables are valid
    825 *
    826 * @params: see amdgpu_vm_update_params definition
    827 * @start: start of GPU address range
    828 * @end: end of GPU address range
    829 * @dst: destination address to map to, the next dst inside the function
    830 * @flags: mapping flags
    831 *
    832 * Update the page tables in the range @start - @end.
    833 *
    834 * Returns:
    835 * 0 for success, -EINVAL for failure.
    836 */
    837int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
    838			  uint64_t start, uint64_t end,
    839			  uint64_t dst, uint64_t flags)
    840{
    841	struct amdgpu_device *adev = params->adev;
    842	struct amdgpu_vm_pt_cursor cursor;
    843	uint64_t frag_start = start, frag_end;
    844	unsigned int frag;
    845	int r;
    846
    847	/* figure out the initial fragment */
    848	amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag,
    849			       &frag_end);
    850
    851	/* walk over the address space and update the PTs */
    852	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
    853	while (cursor.pfn < end) {
    854		unsigned int shift, parent_shift, mask;
    855		uint64_t incr, entry_end, pe_start;
    856		struct amdgpu_bo *pt;
    857
    858		if (!params->unlocked) {
    859			/* make sure that the page tables covering the
    860			 * address range are actually allocated
    861			 */
    862			r = amdgpu_vm_pt_alloc(params->adev, params->vm,
    863					       &cursor, params->immediate);
    864			if (r)
    865				return r;
    866		}
    867
    868		shift = amdgpu_vm_pt_level_shift(adev, cursor.level);
    869		parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1);
    870		if (params->unlocked) {
    871			/* Unlocked updates are only allowed on the leaves */
    872			if (amdgpu_vm_pt_descendant(adev, &cursor))
    873				continue;
    874		} else if (adev->asic_type < CHIP_VEGA10 &&
    875			   (flags & AMDGPU_PTE_VALID)) {
    876			/* No huge page support before GMC v9 */
    877			if (cursor.level != AMDGPU_VM_PTB) {
    878				if (!amdgpu_vm_pt_descendant(adev, &cursor))
    879					return -ENOENT;
    880				continue;
    881			}
    882		} else if (frag < shift) {
    883			/* We can't use this level when the fragment size is
    884			 * smaller than the address shift. Go to the next
    885			 * child entry and try again.
    886			 */
    887			if (amdgpu_vm_pt_descendant(adev, &cursor))
    888				continue;
    889		} else if (frag >= parent_shift) {
    890			/* If the fragment size is even larger than the parent
    891			 * shift we should go up one level and check it again.
    892			 */
    893			if (!amdgpu_vm_pt_ancestor(&cursor))
    894				return -EINVAL;
    895			continue;
    896		}
    897
    898		pt = cursor.entry->bo;
    899		if (!pt) {
    900			/* We need all PDs and PTs for mapping something, */
    901			if (flags & AMDGPU_PTE_VALID)
    902				return -ENOENT;
    903
    904			/* but unmapping something can happen at a higher
    905			 * level.
    906			 */
    907			if (!amdgpu_vm_pt_ancestor(&cursor))
    908				return -EINVAL;
    909
    910			pt = cursor.entry->bo;
    911			shift = parent_shift;
    912			frag_end = max(frag_end, ALIGN(frag_start + 1,
    913				   1ULL << shift));
    914		}
    915
    916		/* Looks good so far, calculate parameters for the update */
    917		incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
    918		mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
    919		pe_start = ((cursor.pfn >> shift) & mask) * 8;
    920		entry_end = ((uint64_t)mask + 1) << shift;
    921		entry_end += cursor.pfn & ~(entry_end - 1);
    922		entry_end = min(entry_end, end);
    923
    924		do {
    925			struct amdgpu_vm *vm = params->vm;
    926			uint64_t upd_end = min(entry_end, frag_end);
    927			unsigned int nptes = (upd_end - frag_start) >> shift;
    928			uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
    929
    930			/* This can happen when we set higher level PDs to
    931			 * silent to stop fault floods.
    932			 */
    933			nptes = max(nptes, 1u);
    934
    935			trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
    936						    min(nptes, 32u), dst, incr,
    937						    upd_flags,
    938						    vm->task_info.pid,
    939						    vm->immediate.fence_context);
    940			amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
    941						   cursor.level, pe_start, dst,
    942						   nptes, incr, upd_flags);
    943
    944			pe_start += nptes * 8;
    945			dst += nptes * incr;
    946
    947			frag_start = upd_end;
    948			if (frag_start >= frag_end) {
    949				/* figure out the next fragment */
    950				amdgpu_vm_pte_fragment(params, frag_start, end,
    951						       flags, &frag, &frag_end);
    952				if (frag < shift)
    953					break;
    954			}
    955		} while (frag_start < entry_end);
    956
    957		if (amdgpu_vm_pt_descendant(adev, &cursor)) {
    958			/* Free all child entries.
    959			 * Update the tables with the flags and addresses and free up subsequent
    960			 * tables in the case of huge pages or freed up areas.
    961			 * This is the maximum you can free, because all other page tables are not
    962			 * completely covered by the range and so potentially still in use.
    963			 */
    964			while (cursor.pfn < frag_start) {
    965				/* Make sure previous mapping is freed */
    966				if (cursor.entry->bo) {
    967					params->table_freed = true;
    968					amdgpu_vm_pt_free_dfs(adev, params->vm,
    969							      &cursor);
    970				}
    971				amdgpu_vm_pt_next(adev, &cursor);
    972			}
    973
    974		} else if (frag >= shift) {
    975			/* or just move on to the next on the same level. */
    976			amdgpu_vm_pt_next(adev, &cursor);
    977		}
    978	}
    979
    980	return 0;
    981}