amdgpu_vm.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
amdgpu_vm.c (67446B)
      1/*
      2 * Copyright 2008 Advanced Micro Devices, Inc.
      3 * Copyright 2008 Red Hat Inc.
      4 * Copyright 2009 Jerome Glisse.
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a
      7 * copy of this software and associated documentation files (the "Software"),
      8 * to deal in the Software without restriction, including without limitation
      9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10 * and/or sell copies of the Software, and to permit persons to whom the
     11 * Software is furnished to do so, subject to the following conditions:
     12 *
     13 * The above copyright notice and this permission notice shall be included in
     14 * all copies or substantial portions of the Software.
     15 *
     16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22 * OTHER DEALINGS IN THE SOFTWARE.
     23 *
     24 * Authors: Dave Airlie
     25 *          Alex Deucher
     26 *          Jerome Glisse
     27 */
     28
     29#include <linux/dma-fence-array.h>
     30#include <linux/interval_tree_generic.h>
     31#include <linux/idr.h>
     32#include <linux/dma-buf.h>
     33
     34#include <drm/amdgpu_drm.h>
     35#include <drm/drm_drv.h>
     36#include "amdgpu.h"
     37#include "amdgpu_trace.h"
     38#include "amdgpu_amdkfd.h"
     39#include "amdgpu_gmc.h"
     40#include "amdgpu_xgmi.h"
     41#include "amdgpu_dma_buf.h"
     42#include "amdgpu_res_cursor.h"
     43#include "kfd_svm.h"
     44
     45/**
     46 * DOC: GPUVM
     47 *
     48 * GPUVM is similar to the legacy gart on older asics, however
     49 * rather than there being a single global gart table
     50 * for the entire GPU, there are multiple VM page tables active
     51 * at any given time.  The VM page tables can contain a mix
     52 * vram pages and system memory pages and system memory pages
     53 * can be mapped as snooped (cached system pages) or unsnooped
     54 * (uncached system pages).
     55 * Each VM has an ID associated with it and there is a page table
     56 * associated with each VMID.  When executing a command buffer,
     57 * the kernel tells the the ring what VMID to use for that command
     58 * buffer.  VMIDs are allocated dynamically as commands are submitted.
     59 * The userspace drivers maintain their own address space and the kernel
     60 * sets up their pages tables accordingly when they submit their
     61 * command buffers and a VMID is assigned.
     62 * Cayman/Trinity support up to 8 active VMs at any given time;
     63 * SI supports 16.
     64 */
     65
     66#define START(node) ((node)->start)
     67#define LAST(node) ((node)->last)
     68
     69INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
     70		     START, LAST, static, amdgpu_vm_it)
     71
     72#undef START
     73#undef LAST
     74
     75/**
     76 * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback
     77 */
     78struct amdgpu_prt_cb {
     79
     80	/**
     81	 * @adev: amdgpu device
     82	 */
     83	struct amdgpu_device *adev;
     84
     85	/**
     86	 * @cb: callback
     87	 */
     88	struct dma_fence_cb cb;
     89};
     90
     91/**
     92 * struct amdgpu_vm_tlb_seq_cb - Helper to increment the TLB flush sequence
     93 */
     94struct amdgpu_vm_tlb_seq_cb {
     95	/**
     96	 * @vm: pointer to the amdgpu_vm structure to set the fence sequence on
     97	 */
     98	struct amdgpu_vm *vm;
     99
    100	/**
    101	 * @cb: callback
    102	 */
    103	struct dma_fence_cb cb;
    104};
    105
    106/**
    107 * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
    108 *
    109 * @adev: amdgpu_device pointer
    110 * @vm: amdgpu_vm pointer
    111 * @pasid: the pasid the VM is using on this GPU
    112 *
    113 * Set the pasid this VM is using on this GPU, can also be used to remove the
    114 * pasid by passing in zero.
    115 *
    116 */
    117int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
    118			u32 pasid)
    119{
    120	int r;
    121
    122	if (vm->pasid == pasid)
    123		return 0;
    124
    125	if (vm->pasid) {
    126		r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid));
    127		if (r < 0)
    128			return r;
    129
    130		vm->pasid = 0;
    131	}
    132
    133	if (pasid) {
    134		r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
    135					GFP_KERNEL));
    136		if (r < 0)
    137			return r;
    138
    139		vm->pasid = pasid;
    140	}
    141
    142
    143	return 0;
    144}
    145
    146/*
    147 * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
    148 * happens while holding this lock anywhere to prevent deadlocks when
    149 * an MMU notifier runs in reclaim-FS context.
    150 */
    151static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
    152{
    153	mutex_lock(&vm->eviction_lock);
    154	vm->saved_flags = memalloc_noreclaim_save();
    155}
    156
    157static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
    158{
    159	if (mutex_trylock(&vm->eviction_lock)) {
    160		vm->saved_flags = memalloc_noreclaim_save();
    161		return 1;
    162	}
    163	return 0;
    164}
    165
    166static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
    167{
    168	memalloc_noreclaim_restore(vm->saved_flags);
    169	mutex_unlock(&vm->eviction_lock);
    170}
    171
    172/**
    173 * amdgpu_vm_bo_evicted - vm_bo is evicted
    174 *
    175 * @vm_bo: vm_bo which is evicted
    176 *
    177 * State for PDs/PTs and per VM BOs which are not at the location they should
    178 * be.
    179 */
    180static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
    181{
    182	struct amdgpu_vm *vm = vm_bo->vm;
    183	struct amdgpu_bo *bo = vm_bo->bo;
    184
    185	vm_bo->moved = true;
    186	if (bo->tbo.type == ttm_bo_type_kernel)
    187		list_move(&vm_bo->vm_status, &vm->evicted);
    188	else
    189		list_move_tail(&vm_bo->vm_status, &vm->evicted);
    190}
    191/**
    192 * amdgpu_vm_bo_moved - vm_bo is moved
    193 *
    194 * @vm_bo: vm_bo which is moved
    195 *
    196 * State for per VM BOs which are moved, but that change is not yet reflected
    197 * in the page tables.
    198 */
    199static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
    200{
    201	list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
    202}
    203
    204/**
    205 * amdgpu_vm_bo_idle - vm_bo is idle
    206 *
    207 * @vm_bo: vm_bo which is now idle
    208 *
    209 * State for PDs/PTs and per VM BOs which have gone through the state machine
    210 * and are now idle.
    211 */
    212static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
    213{
    214	list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
    215	vm_bo->moved = false;
    216}
    217
    218/**
    219 * amdgpu_vm_bo_invalidated - vm_bo is invalidated
    220 *
    221 * @vm_bo: vm_bo which is now invalidated
    222 *
    223 * State for normal BOs which are invalidated and that change not yet reflected
    224 * in the PTs.
    225 */
    226static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
    227{
    228	spin_lock(&vm_bo->vm->invalidated_lock);
    229	list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
    230	spin_unlock(&vm_bo->vm->invalidated_lock);
    231}
    232
    233/**
    234 * amdgpu_vm_bo_relocated - vm_bo is reloacted
    235 *
    236 * @vm_bo: vm_bo which is relocated
    237 *
    238 * State for PDs/PTs which needs to update their parent PD.
    239 * For the root PD, just move to idle state.
    240 */
    241static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
    242{
    243	if (vm_bo->bo->parent)
    244		list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
    245	else
    246		amdgpu_vm_bo_idle(vm_bo);
    247}
    248
    249/**
    250 * amdgpu_vm_bo_done - vm_bo is done
    251 *
    252 * @vm_bo: vm_bo which is now done
    253 *
    254 * State for normal BOs which are invalidated and that change has been updated
    255 * in the PTs.
    256 */
    257static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
    258{
    259	spin_lock(&vm_bo->vm->invalidated_lock);
    260	list_move(&vm_bo->vm_status, &vm_bo->vm->done);
    261	spin_unlock(&vm_bo->vm->invalidated_lock);
    262}
    263
    264/**
    265 * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
    266 *
    267 * @base: base structure for tracking BO usage in a VM
    268 * @vm: vm to which bo is to be added
    269 * @bo: amdgpu buffer object
    270 *
    271 * Initialize a bo_va_base structure and add it to the appropriate lists
    272 *
    273 */
    274void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
    275			    struct amdgpu_vm *vm, struct amdgpu_bo *bo)
    276{
    277	base->vm = vm;
    278	base->bo = bo;
    279	base->next = NULL;
    280	INIT_LIST_HEAD(&base->vm_status);
    281
    282	if (!bo)
    283		return;
    284	base->next = bo->vm_bo;
    285	bo->vm_bo = base;
    286
    287	if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
    288		return;
    289
    290	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
    291
    292	ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move);
    293	if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
    294		amdgpu_vm_bo_relocated(base);
    295	else
    296		amdgpu_vm_bo_idle(base);
    297
    298	if (bo->preferred_domains &
    299	    amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))
    300		return;
    301
    302	/*
    303	 * we checked all the prerequisites, but it looks like this per vm bo
    304	 * is currently evicted. add the bo to the evicted list to make sure it
    305	 * is validated on next vm use to avoid fault.
    306	 * */
    307	amdgpu_vm_bo_evicted(base);
    308}
    309
    310/**
    311 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
    312 *
    313 * @vm: vm providing the BOs
    314 * @validated: head of validation list
    315 * @entry: entry to add
    316 *
    317 * Add the page directory to the list of BOs to
    318 * validate for command submission.
    319 */
    320void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
    321			 struct list_head *validated,
    322			 struct amdgpu_bo_list_entry *entry)
    323{
    324	entry->priority = 0;
    325	entry->tv.bo = &vm->root.bo->tbo;
    326	/* Two for VM updates, one for TTM and one for the CS job */
    327	entry->tv.num_shared = 4;
    328	entry->user_pages = NULL;
    329	list_add(&entry->tv.head, validated);
    330}
    331
    332/**
    333 * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
    334 *
    335 * @adev: amdgpu device pointer
    336 * @vm: vm providing the BOs
    337 *
    338 * Move all BOs to the end of LRU and remember their positions to put them
    339 * together.
    340 */
    341void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
    342				struct amdgpu_vm *vm)
    343{
    344	spin_lock(&adev->mman.bdev.lru_lock);
    345	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
    346	spin_unlock(&adev->mman.bdev.lru_lock);
    347}
    348
    349/**
    350 * amdgpu_vm_validate_pt_bos - validate the page table BOs
    351 *
    352 * @adev: amdgpu device pointer
    353 * @vm: vm providing the BOs
    354 * @validate: callback to do the validation
    355 * @param: parameter for the validation callback
    356 *
    357 * Validate the page table BOs on command submission if neccessary.
    358 *
    359 * Returns:
    360 * Validation result.
    361 */
    362int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
    363			      int (*validate)(void *p, struct amdgpu_bo *bo),
    364			      void *param)
    365{
    366	struct amdgpu_vm_bo_base *bo_base, *tmp;
    367	int r;
    368
    369	list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
    370		struct amdgpu_bo *bo = bo_base->bo;
    371		struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
    372
    373		r = validate(param, bo);
    374		if (r)
    375			return r;
    376		if (shadow) {
    377			r = validate(param, shadow);
    378			if (r)
    379				return r;
    380		}
    381
    382		if (bo->tbo.type != ttm_bo_type_kernel) {
    383			amdgpu_vm_bo_moved(bo_base);
    384		} else {
    385			vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
    386			amdgpu_vm_bo_relocated(bo_base);
    387		}
    388	}
    389
    390	amdgpu_vm_eviction_lock(vm);
    391	vm->evicting = false;
    392	amdgpu_vm_eviction_unlock(vm);
    393
    394	return 0;
    395}
    396
    397/**
    398 * amdgpu_vm_ready - check VM is ready for updates
    399 *
    400 * @vm: VM to check
    401 *
    402 * Check if all VM PDs/PTs are ready for updates
    403 *
    404 * Returns:
    405 * True if VM is not evicting.
    406 */
    407bool amdgpu_vm_ready(struct amdgpu_vm *vm)
    408{
    409	bool ret;
    410
    411	amdgpu_vm_eviction_lock(vm);
    412	ret = !vm->evicting;
    413	amdgpu_vm_eviction_unlock(vm);
    414
    415	return ret && list_empty(&vm->evicted);
    416}
    417
    418/**
    419 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
    420 *
    421 * @adev: amdgpu_device pointer
    422 */
    423void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
    424{
    425	const struct amdgpu_ip_block *ip_block;
    426	bool has_compute_vm_bug;
    427	struct amdgpu_ring *ring;
    428	int i;
    429
    430	has_compute_vm_bug = false;
    431
    432	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
    433	if (ip_block) {
    434		/* Compute has a VM bug for GFX version < 7.
    435		   Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
    436		if (ip_block->version->major <= 7)
    437			has_compute_vm_bug = true;
    438		else if (ip_block->version->major == 8)
    439			if (adev->gfx.mec_fw_version < 673)
    440				has_compute_vm_bug = true;
    441	}
    442
    443	for (i = 0; i < adev->num_rings; i++) {
    444		ring = adev->rings[i];
    445		if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
    446			/* only compute rings */
    447			ring->has_compute_vm_bug = has_compute_vm_bug;
    448		else
    449			ring->has_compute_vm_bug = false;
    450	}
    451}
    452
    453/**
    454 * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
    455 *
    456 * @ring: ring on which the job will be submitted
    457 * @job: job to submit
    458 *
    459 * Returns:
    460 * True if sync is needed.
    461 */
    462bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
    463				  struct amdgpu_job *job)
    464{
    465	struct amdgpu_device *adev = ring->adev;
    466	unsigned vmhub = ring->funcs->vmhub;
    467	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
    468	struct amdgpu_vmid *id;
    469	bool gds_switch_needed;
    470	bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
    471
    472	if (job->vmid == 0)
    473		return false;
    474	id = &id_mgr->ids[job->vmid];
    475	gds_switch_needed = ring->funcs->emit_gds_switch && (
    476		id->gds_base != job->gds_base ||
    477		id->gds_size != job->gds_size ||
    478		id->gws_base != job->gws_base ||
    479		id->gws_size != job->gws_size ||
    480		id->oa_base != job->oa_base ||
    481		id->oa_size != job->oa_size);
    482
    483	if (amdgpu_vmid_had_gpu_reset(adev, id))
    484		return true;
    485
    486	return vm_flush_needed || gds_switch_needed;
    487}
    488
    489/**
    490 * amdgpu_vm_flush - hardware flush the vm
    491 *
    492 * @ring: ring to use for flush
    493 * @job:  related job
    494 * @need_pipe_sync: is pipe sync needed
    495 *
    496 * Emit a VM flush when it is necessary.
    497 *
    498 * Returns:
    499 * 0 on success, errno otherwise.
    500 */
    501int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
    502		    bool need_pipe_sync)
    503{
    504	struct amdgpu_device *adev = ring->adev;
    505	unsigned vmhub = ring->funcs->vmhub;
    506	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
    507	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
    508	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
    509		id->gds_base != job->gds_base ||
    510		id->gds_size != job->gds_size ||
    511		id->gws_base != job->gws_base ||
    512		id->gws_size != job->gws_size ||
    513		id->oa_base != job->oa_base ||
    514		id->oa_size != job->oa_size);
    515	bool vm_flush_needed = job->vm_needs_flush;
    516	struct dma_fence *fence = NULL;
    517	bool pasid_mapping_needed = false;
    518	unsigned patch_offset = 0;
    519	bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL));
    520	int r;
    521
    522	if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid)
    523		adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
    524
    525	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
    526		gds_switch_needed = true;
    527		vm_flush_needed = true;
    528		pasid_mapping_needed = true;
    529	}
    530
    531	mutex_lock(&id_mgr->lock);
    532	if (id->pasid != job->pasid || !id->pasid_mapping ||
    533	    !dma_fence_is_signaled(id->pasid_mapping))
    534		pasid_mapping_needed = true;
    535	mutex_unlock(&id_mgr->lock);
    536
    537	gds_switch_needed &= !!ring->funcs->emit_gds_switch;
    538	vm_flush_needed &= !!ring->funcs->emit_vm_flush  &&
    539			job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
    540	pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
    541		ring->funcs->emit_wreg;
    542
    543	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
    544		return 0;
    545
    546	if (ring->funcs->init_cond_exec)
    547		patch_offset = amdgpu_ring_init_cond_exec(ring);
    548
    549	if (need_pipe_sync)
    550		amdgpu_ring_emit_pipeline_sync(ring);
    551
    552	if (vm_flush_needed) {
    553		trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
    554		amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
    555	}
    556
    557	if (pasid_mapping_needed)
    558		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
    559
    560	if (vm_flush_needed || pasid_mapping_needed) {
    561		r = amdgpu_fence_emit(ring, &fence, NULL, 0);
    562		if (r)
    563			return r;
    564	}
    565
    566	if (vm_flush_needed) {
    567		mutex_lock(&id_mgr->lock);
    568		dma_fence_put(id->last_flush);
    569		id->last_flush = dma_fence_get(fence);
    570		id->current_gpu_reset_count =
    571			atomic_read(&adev->gpu_reset_counter);
    572		mutex_unlock(&id_mgr->lock);
    573	}
    574
    575	if (pasid_mapping_needed) {
    576		mutex_lock(&id_mgr->lock);
    577		id->pasid = job->pasid;
    578		dma_fence_put(id->pasid_mapping);
    579		id->pasid_mapping = dma_fence_get(fence);
    580		mutex_unlock(&id_mgr->lock);
    581	}
    582	dma_fence_put(fence);
    583
    584	if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
    585	    gds_switch_needed) {
    586		id->gds_base = job->gds_base;
    587		id->gds_size = job->gds_size;
    588		id->gws_base = job->gws_base;
    589		id->gws_size = job->gws_size;
    590		id->oa_base = job->oa_base;
    591		id->oa_size = job->oa_size;
    592		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
    593					    job->gds_size, job->gws_base,
    594					    job->gws_size, job->oa_base,
    595					    job->oa_size);
    596	}
    597
    598	if (ring->funcs->patch_cond_exec)
    599		amdgpu_ring_patch_cond_exec(ring, patch_offset);
    600
    601	/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
    602	if (ring->funcs->emit_switch_buffer) {
    603		amdgpu_ring_emit_switch_buffer(ring);
    604		amdgpu_ring_emit_switch_buffer(ring);
    605	}
    606	return 0;
    607}
    608
    609/**
    610 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
    611 *
    612 * @vm: requested vm
    613 * @bo: requested buffer object
    614 *
    615 * Find @bo inside the requested vm.
    616 * Search inside the @bos vm list for the requested vm
    617 * Returns the found bo_va or NULL if none is found
    618 *
    619 * Object has to be reserved!
    620 *
    621 * Returns:
    622 * Found bo_va or NULL.
    623 */
    624struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
    625				       struct amdgpu_bo *bo)
    626{
    627	struct amdgpu_vm_bo_base *base;
    628
    629	for (base = bo->vm_bo; base; base = base->next) {
    630		if (base->vm != vm)
    631			continue;
    632
    633		return container_of(base, struct amdgpu_bo_va, base);
    634	}
    635	return NULL;
    636}
    637
    638/**
    639 * amdgpu_vm_map_gart - Resolve gart mapping of addr
    640 *
    641 * @pages_addr: optional DMA address to use for lookup
    642 * @addr: the unmapped addr
    643 *
    644 * Look up the physical address of the page that the pte resolves
    645 * to.
    646 *
    647 * Returns:
    648 * The pointer for the page table entry.
    649 */
    650uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
    651{
    652	uint64_t result;
    653
    654	/* page table offset */
    655	result = pages_addr[addr >> PAGE_SHIFT];
    656
    657	/* in case cpu page size != gpu page size*/
    658	result |= addr & (~PAGE_MASK);
    659
    660	result &= 0xFFFFFFFFFFFFF000ULL;
    661
    662	return result;
    663}
    664
    665/**
    666 * amdgpu_vm_update_pdes - make sure that all directories are valid
    667 *
    668 * @adev: amdgpu_device pointer
    669 * @vm: requested vm
    670 * @immediate: submit immediately to the paging queue
    671 *
    672 * Makes sure all directories are up to date.
    673 *
    674 * Returns:
    675 * 0 for success, error for failure.
    676 */
    677int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
    678			  struct amdgpu_vm *vm, bool immediate)
    679{
    680	struct amdgpu_vm_update_params params;
    681	struct amdgpu_vm_bo_base *entry;
    682	bool flush_tlb_needed = false;
    683	int r, idx;
    684
    685	if (list_empty(&vm->relocated))
    686		return 0;
    687
    688	if (!drm_dev_enter(adev_to_drm(adev), &idx))
    689		return -ENODEV;
    690
    691	memset(&params, 0, sizeof(params));
    692	params.adev = adev;
    693	params.vm = vm;
    694	params.immediate = immediate;
    695
    696	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
    697	if (r)
    698		goto error;
    699
    700	list_for_each_entry(entry, &vm->relocated, vm_status) {
    701		/* vm_flush_needed after updating moved PDEs */
    702		flush_tlb_needed |= entry->moved;
    703
    704		r = amdgpu_vm_pde_update(&params, entry);
    705		if (r)
    706			goto error;
    707	}
    708
    709	r = vm->update_funcs->commit(&params, &vm->last_update);
    710	if (r)
    711		goto error;
    712
    713	if (flush_tlb_needed)
    714		atomic64_inc(&vm->tlb_seq);
    715
    716	while (!list_empty(&vm->relocated)) {
    717		entry = list_first_entry(&vm->relocated,
    718					 struct amdgpu_vm_bo_base,
    719					 vm_status);
    720		amdgpu_vm_bo_idle(entry);
    721	}
    722
    723error:
    724	drm_dev_exit(idx);
    725	return r;
    726}
    727
    728/**
    729 * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
    730 * @fence: unused
    731 * @cb: the callback structure
    732 *
    733 * Increments the tlb sequence to make sure that future CS execute a VM flush.
    734 */
    735static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
    736				 struct dma_fence_cb *cb)
    737{
    738	struct amdgpu_vm_tlb_seq_cb *tlb_cb;
    739
    740	tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
    741	atomic64_inc(&tlb_cb->vm->tlb_seq);
    742	kfree(tlb_cb);
    743}
    744
    745/**
    746 * amdgpu_vm_update_range - update a range in the vm page table
    747 *
    748 * @adev: amdgpu_device pointer to use for commands
    749 * @vm: the VM to update the range
    750 * @immediate: immediate submission in a page fault
    751 * @unlocked: unlocked invalidation during MM callback
    752 * @flush_tlb: trigger tlb invalidation after update completed
    753 * @resv: fences we need to sync to
    754 * @start: start of mapped range
    755 * @last: last mapped entry
    756 * @flags: flags for the entries
    757 * @offset: offset into nodes and pages_addr
    758 * @vram_base: base for vram mappings
    759 * @res: ttm_resource to map
    760 * @pages_addr: DMA addresses to use for mapping
    761 * @fence: optional resulting fence
    762 *
    763 * Fill in the page table entries between @start and @last.
    764 *
    765 * Returns:
    766 * 0 for success, negative erro code for failure.
    767 */
    768int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
    769			   bool immediate, bool unlocked, bool flush_tlb,
    770			   struct dma_resv *resv, uint64_t start, uint64_t last,
    771			   uint64_t flags, uint64_t offset, uint64_t vram_base,
    772			   struct ttm_resource *res, dma_addr_t *pages_addr,
    773			   struct dma_fence **fence)
    774{
    775	struct amdgpu_vm_update_params params;
    776	struct amdgpu_vm_tlb_seq_cb *tlb_cb;
    777	struct amdgpu_res_cursor cursor;
    778	enum amdgpu_sync_mode sync_mode;
    779	int r, idx;
    780
    781	if (!drm_dev_enter(adev_to_drm(adev), &idx))
    782		return -ENODEV;
    783
    784	tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
    785	if (!tlb_cb) {
    786		r = -ENOMEM;
    787		goto error_unlock;
    788	}
    789
    790	/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
    791	 * heavy-weight flush TLB unconditionally.
    792	 */
    793	flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
    794		     adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0);
    795
    796	/*
    797	 * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
    798	 */
    799	flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0);
    800
    801	memset(&params, 0, sizeof(params));
    802	params.adev = adev;
    803	params.vm = vm;
    804	params.immediate = immediate;
    805	params.pages_addr = pages_addr;
    806	params.unlocked = unlocked;
    807
    808	/* Implicitly sync to command submissions in the same VM before
    809	 * unmapping. Sync to moving fences before mapping.
    810	 */
    811	if (!(flags & AMDGPU_PTE_VALID))
    812		sync_mode = AMDGPU_SYNC_EQ_OWNER;
    813	else
    814		sync_mode = AMDGPU_SYNC_EXPLICIT;
    815
    816	amdgpu_vm_eviction_lock(vm);
    817	if (vm->evicting) {
    818		r = -EBUSY;
    819		goto error_free;
    820	}
    821
    822	if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
    823		struct dma_fence *tmp = dma_fence_get_stub();
    824
    825		amdgpu_bo_fence(vm->root.bo, vm->last_unlocked, true);
    826		swap(vm->last_unlocked, tmp);
    827		dma_fence_put(tmp);
    828	}
    829
    830	r = vm->update_funcs->prepare(&params, resv, sync_mode);
    831	if (r)
    832		goto error_free;
    833
    834	amdgpu_res_first(pages_addr ? NULL : res, offset,
    835			 (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor);
    836	while (cursor.remaining) {
    837		uint64_t tmp, num_entries, addr;
    838
    839		num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
    840		if (pages_addr) {
    841			bool contiguous = true;
    842
    843			if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
    844				uint64_t pfn = cursor.start >> PAGE_SHIFT;
    845				uint64_t count;
    846
    847				contiguous = pages_addr[pfn + 1] ==
    848					pages_addr[pfn] + PAGE_SIZE;
    849
    850				tmp = num_entries /
    851					AMDGPU_GPU_PAGES_IN_CPU_PAGE;
    852				for (count = 2; count < tmp; ++count) {
    853					uint64_t idx = pfn + count;
    854
    855					if (contiguous != (pages_addr[idx] ==
    856					    pages_addr[idx - 1] + PAGE_SIZE))
    857						break;
    858				}
    859				num_entries = count *
    860					AMDGPU_GPU_PAGES_IN_CPU_PAGE;
    861			}
    862
    863			if (!contiguous) {
    864				addr = cursor.start;
    865				params.pages_addr = pages_addr;
    866			} else {
    867				addr = pages_addr[cursor.start >> PAGE_SHIFT];
    868				params.pages_addr = NULL;
    869			}
    870
    871		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
    872			addr = vram_base + cursor.start;
    873		} else {
    874			addr = 0;
    875		}
    876
    877		tmp = start + num_entries;
    878		r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags);
    879		if (r)
    880			goto error_free;
    881
    882		amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
    883		start = tmp;
    884	}
    885
    886	r = vm->update_funcs->commit(&params, fence);
    887
    888	if (flush_tlb || params.table_freed) {
    889		tlb_cb->vm = vm;
    890		if (fence && *fence &&
    891		    !dma_fence_add_callback(*fence, &tlb_cb->cb,
    892					   amdgpu_vm_tlb_seq_cb)) {
    893			dma_fence_put(vm->last_tlb_flush);
    894			vm->last_tlb_flush = dma_fence_get(*fence);
    895		} else {
    896			amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
    897		}
    898		tlb_cb = NULL;
    899	}
    900
    901error_free:
    902	kfree(tlb_cb);
    903
    904error_unlock:
    905	amdgpu_vm_eviction_unlock(vm);
    906	drm_dev_exit(idx);
    907	return r;
    908}
    909
    910void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
    911				uint64_t *gtt_mem, uint64_t *cpu_mem)
    912{
    913	struct amdgpu_bo_va *bo_va, *tmp;
    914
    915	list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
    916		if (!bo_va->base.bo)
    917			continue;
    918		amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
    919				gtt_mem, cpu_mem);
    920	}
    921	list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
    922		if (!bo_va->base.bo)
    923			continue;
    924		amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
    925				gtt_mem, cpu_mem);
    926	}
    927	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
    928		if (!bo_va->base.bo)
    929			continue;
    930		amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
    931				gtt_mem, cpu_mem);
    932	}
    933	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
    934		if (!bo_va->base.bo)
    935			continue;
    936		amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
    937				gtt_mem, cpu_mem);
    938	}
    939	spin_lock(&vm->invalidated_lock);
    940	list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
    941		if (!bo_va->base.bo)
    942			continue;
    943		amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
    944				gtt_mem, cpu_mem);
    945	}
    946	list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) {
    947		if (!bo_va->base.bo)
    948			continue;
    949		amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
    950				gtt_mem, cpu_mem);
    951	}
    952	spin_unlock(&vm->invalidated_lock);
    953}
    954/**
    955 * amdgpu_vm_bo_update - update all BO mappings in the vm page table
    956 *
    957 * @adev: amdgpu_device pointer
    958 * @bo_va: requested BO and VM object
    959 * @clear: if true clear the entries
    960 *
    961 * Fill in the page table entries for @bo_va.
    962 *
    963 * Returns:
    964 * 0 for success, -EINVAL for failure.
    965 */
    966int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
    967			bool clear)
    968{
    969	struct amdgpu_bo *bo = bo_va->base.bo;
    970	struct amdgpu_vm *vm = bo_va->base.vm;
    971	struct amdgpu_bo_va_mapping *mapping;
    972	dma_addr_t *pages_addr = NULL;
    973	struct ttm_resource *mem;
    974	struct dma_fence **last_update;
    975	bool flush_tlb = clear;
    976	struct dma_resv *resv;
    977	uint64_t vram_base;
    978	uint64_t flags;
    979	int r;
    980
    981	if (clear || !bo) {
    982		mem = NULL;
    983		resv = vm->root.bo->tbo.base.resv;
    984	} else {
    985		struct drm_gem_object *obj = &bo->tbo.base;
    986
    987		resv = bo->tbo.base.resv;
    988		if (obj->import_attach && bo_va->is_xgmi) {
    989			struct dma_buf *dma_buf = obj->import_attach->dmabuf;
    990			struct drm_gem_object *gobj = dma_buf->priv;
    991			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
    992
    993			if (abo->tbo.resource->mem_type == TTM_PL_VRAM)
    994				bo = gem_to_amdgpu_bo(gobj);
    995		}
    996		mem = bo->tbo.resource;
    997		if (mem->mem_type == TTM_PL_TT ||
    998		    mem->mem_type == AMDGPU_PL_PREEMPT)
    999			pages_addr = bo->tbo.ttm->dma_address;
   1000	}
   1001
   1002	if (bo) {
   1003		struct amdgpu_device *bo_adev;
   1004
   1005		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
   1006
   1007		if (amdgpu_bo_encrypted(bo))
   1008			flags |= AMDGPU_PTE_TMZ;
   1009
   1010		bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
   1011		vram_base = bo_adev->vm_manager.vram_base_offset;
   1012	} else {
   1013		flags = 0x0;
   1014		vram_base = 0;
   1015	}
   1016
   1017	if (clear || (bo && bo->tbo.base.resv ==
   1018		      vm->root.bo->tbo.base.resv))
   1019		last_update = &vm->last_update;
   1020	else
   1021		last_update = &bo_va->last_pt_update;
   1022
   1023	if (!clear && bo_va->base.moved) {
   1024		flush_tlb = true;
   1025		list_splice_init(&bo_va->valids, &bo_va->invalids);
   1026
   1027	} else if (bo_va->cleared != clear) {
   1028		list_splice_init(&bo_va->valids, &bo_va->invalids);
   1029	}
   1030
   1031	list_for_each_entry(mapping, &bo_va->invalids, list) {
   1032		uint64_t update_flags = flags;
   1033
   1034		/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
   1035		 * but in case of something, we filter the flags in first place
   1036		 */
   1037		if (!(mapping->flags & AMDGPU_PTE_READABLE))
   1038			update_flags &= ~AMDGPU_PTE_READABLE;
   1039		if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
   1040			update_flags &= ~AMDGPU_PTE_WRITEABLE;
   1041
   1042		/* Apply ASIC specific mapping flags */
   1043		amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags);
   1044
   1045		trace_amdgpu_vm_bo_update(mapping);
   1046
   1047		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
   1048					   resv, mapping->start, mapping->last,
   1049					   update_flags, mapping->offset,
   1050					   vram_base, mem, pages_addr,
   1051					   last_update);
   1052		if (r)
   1053			return r;
   1054	}
   1055
   1056	/* If the BO is not in its preferred location add it back to
   1057	 * the evicted list so that it gets validated again on the
   1058	 * next command submission.
   1059	 */
   1060	if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
   1061		uint32_t mem_type = bo->tbo.resource->mem_type;
   1062
   1063		if (!(bo->preferred_domains &
   1064		      amdgpu_mem_type_to_domain(mem_type)))
   1065			amdgpu_vm_bo_evicted(&bo_va->base);
   1066		else
   1067			amdgpu_vm_bo_idle(&bo_va->base);
   1068	} else {
   1069		amdgpu_vm_bo_done(&bo_va->base);
   1070	}
   1071
   1072	list_splice_init(&bo_va->invalids, &bo_va->valids);
   1073	bo_va->cleared = clear;
   1074	bo_va->base.moved = false;
   1075
   1076	if (trace_amdgpu_vm_bo_mapping_enabled()) {
   1077		list_for_each_entry(mapping, &bo_va->valids, list)
   1078			trace_amdgpu_vm_bo_mapping(mapping);
   1079	}
   1080
   1081	return 0;
   1082}
   1083
   1084/**
   1085 * amdgpu_vm_update_prt_state - update the global PRT state
   1086 *
   1087 * @adev: amdgpu_device pointer
   1088 */
   1089static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
   1090{
   1091	unsigned long flags;
   1092	bool enable;
   1093
   1094	spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
   1095	enable = !!atomic_read(&adev->vm_manager.num_prt_users);
   1096	adev->gmc.gmc_funcs->set_prt(adev, enable);
   1097	spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
   1098}
   1099
   1100/**
   1101 * amdgpu_vm_prt_get - add a PRT user
   1102 *
   1103 * @adev: amdgpu_device pointer
   1104 */
   1105static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
   1106{
   1107	if (!adev->gmc.gmc_funcs->set_prt)
   1108		return;
   1109
   1110	if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
   1111		amdgpu_vm_update_prt_state(adev);
   1112}
   1113
   1114/**
   1115 * amdgpu_vm_prt_put - drop a PRT user
   1116 *
   1117 * @adev: amdgpu_device pointer
   1118 */
   1119static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
   1120{
   1121	if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
   1122		amdgpu_vm_update_prt_state(adev);
   1123}
   1124
   1125/**
   1126 * amdgpu_vm_prt_cb - callback for updating the PRT status
   1127 *
   1128 * @fence: fence for the callback
   1129 * @_cb: the callback function
   1130 */
   1131static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
   1132{
   1133	struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
   1134
   1135	amdgpu_vm_prt_put(cb->adev);
   1136	kfree(cb);
   1137}
   1138
   1139/**
   1140 * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
   1141 *
   1142 * @adev: amdgpu_device pointer
   1143 * @fence: fence for the callback
   1144 */
   1145static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
   1146				 struct dma_fence *fence)
   1147{
   1148	struct amdgpu_prt_cb *cb;
   1149
   1150	if (!adev->gmc.gmc_funcs->set_prt)
   1151		return;
   1152
   1153	cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
   1154	if (!cb) {
   1155		/* Last resort when we are OOM */
   1156		if (fence)
   1157			dma_fence_wait(fence, false);
   1158
   1159		amdgpu_vm_prt_put(adev);
   1160	} else {
   1161		cb->adev = adev;
   1162		if (!fence || dma_fence_add_callback(fence, &cb->cb,
   1163						     amdgpu_vm_prt_cb))
   1164			amdgpu_vm_prt_cb(fence, &cb->cb);
   1165	}
   1166}
   1167
   1168/**
   1169 * amdgpu_vm_free_mapping - free a mapping
   1170 *
   1171 * @adev: amdgpu_device pointer
   1172 * @vm: requested vm
   1173 * @mapping: mapping to be freed
   1174 * @fence: fence of the unmap operation
   1175 *
   1176 * Free a mapping and make sure we decrease the PRT usage count if applicable.
   1177 */
   1178static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
   1179				   struct amdgpu_vm *vm,
   1180				   struct amdgpu_bo_va_mapping *mapping,
   1181				   struct dma_fence *fence)
   1182{
   1183	if (mapping->flags & AMDGPU_PTE_PRT)
   1184		amdgpu_vm_add_prt_cb(adev, fence);
   1185	kfree(mapping);
   1186}
   1187
   1188/**
   1189 * amdgpu_vm_prt_fini - finish all prt mappings
   1190 *
   1191 * @adev: amdgpu_device pointer
   1192 * @vm: requested vm
   1193 *
   1194 * Register a cleanup callback to disable PRT support after VM dies.
   1195 */
   1196static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
   1197{
   1198	struct dma_resv *resv = vm->root.bo->tbo.base.resv;
   1199	struct dma_resv_iter cursor;
   1200	struct dma_fence *fence;
   1201
   1202	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
   1203		/* Add a callback for each fence in the reservation object */
   1204		amdgpu_vm_prt_get(adev);
   1205		amdgpu_vm_add_prt_cb(adev, fence);
   1206	}
   1207}
   1208
   1209/**
   1210 * amdgpu_vm_clear_freed - clear freed BOs in the PT
   1211 *
   1212 * @adev: amdgpu_device pointer
   1213 * @vm: requested vm
   1214 * @fence: optional resulting fence (unchanged if no work needed to be done
   1215 * or if an error occurred)
   1216 *
   1217 * Make sure all freed BOs are cleared in the PT.
   1218 * PTs have to be reserved and mutex must be locked!
   1219 *
   1220 * Returns:
   1221 * 0 for success.
   1222 *
   1223 */
   1224int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
   1225			  struct amdgpu_vm *vm,
   1226			  struct dma_fence **fence)
   1227{
   1228	struct dma_resv *resv = vm->root.bo->tbo.base.resv;
   1229	struct amdgpu_bo_va_mapping *mapping;
   1230	uint64_t init_pte_value = 0;
   1231	struct dma_fence *f = NULL;
   1232	int r;
   1233
   1234	while (!list_empty(&vm->freed)) {
   1235		mapping = list_first_entry(&vm->freed,
   1236			struct amdgpu_bo_va_mapping, list);
   1237		list_del(&mapping->list);
   1238
   1239		if (vm->pte_support_ats &&
   1240		    mapping->start < AMDGPU_GMC_HOLE_START)
   1241			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
   1242
   1243		r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
   1244					   mapping->start, mapping->last,
   1245					   init_pte_value, 0, 0, NULL, NULL,
   1246					   &f);
   1247		amdgpu_vm_free_mapping(adev, vm, mapping, f);
   1248		if (r) {
   1249			dma_fence_put(f);
   1250			return r;
   1251		}
   1252	}
   1253
   1254	if (fence && f) {
   1255		dma_fence_put(*fence);
   1256		*fence = f;
   1257	} else {
   1258		dma_fence_put(f);
   1259	}
   1260
   1261	return 0;
   1262
   1263}
   1264
   1265/**
   1266 * amdgpu_vm_handle_moved - handle moved BOs in the PT
   1267 *
   1268 * @adev: amdgpu_device pointer
   1269 * @vm: requested vm
   1270 *
   1271 * Make sure all BOs which are moved are updated in the PTs.
   1272 *
   1273 * Returns:
   1274 * 0 for success.
   1275 *
   1276 * PTs have to be reserved!
   1277 */
   1278int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
   1279			   struct amdgpu_vm *vm)
   1280{
   1281	struct amdgpu_bo_va *bo_va, *tmp;
   1282	struct dma_resv *resv;
   1283	bool clear;
   1284	int r;
   1285
   1286	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
   1287		/* Per VM BOs never need to bo cleared in the page tables */
   1288		r = amdgpu_vm_bo_update(adev, bo_va, false);
   1289		if (r)
   1290			return r;
   1291	}
   1292
   1293	spin_lock(&vm->invalidated_lock);
   1294	while (!list_empty(&vm->invalidated)) {
   1295		bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
   1296					 base.vm_status);
   1297		resv = bo_va->base.bo->tbo.base.resv;
   1298		spin_unlock(&vm->invalidated_lock);
   1299
   1300		/* Try to reserve the BO to avoid clearing its ptes */
   1301		if (!amdgpu_vm_debug && dma_resv_trylock(resv))
   1302			clear = false;
   1303		/* Somebody else is using the BO right now */
   1304		else
   1305			clear = true;
   1306
   1307		r = amdgpu_vm_bo_update(adev, bo_va, clear);
   1308		if (r)
   1309			return r;
   1310
   1311		if (!clear)
   1312			dma_resv_unlock(resv);
   1313		spin_lock(&vm->invalidated_lock);
   1314	}
   1315	spin_unlock(&vm->invalidated_lock);
   1316
   1317	return 0;
   1318}
   1319
   1320/**
   1321 * amdgpu_vm_bo_add - add a bo to a specific vm
   1322 *
   1323 * @adev: amdgpu_device pointer
   1324 * @vm: requested vm
   1325 * @bo: amdgpu buffer object
   1326 *
   1327 * Add @bo into the requested vm.
   1328 * Add @bo to the list of bos associated with the vm
   1329 *
   1330 * Returns:
   1331 * Newly added bo_va or NULL for failure
   1332 *
   1333 * Object has to be reserved!
   1334 */
   1335struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
   1336				      struct amdgpu_vm *vm,
   1337				      struct amdgpu_bo *bo)
   1338{
   1339	struct amdgpu_bo_va *bo_va;
   1340
   1341	bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
   1342	if (bo_va == NULL) {
   1343		return NULL;
   1344	}
   1345	amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
   1346
   1347	bo_va->ref_count = 1;
   1348	INIT_LIST_HEAD(&bo_va->valids);
   1349	INIT_LIST_HEAD(&bo_va->invalids);
   1350
   1351	if (!bo)
   1352		return bo_va;
   1353
   1354	dma_resv_assert_held(bo->tbo.base.resv);
   1355	if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) {
   1356		bo_va->is_xgmi = true;
   1357		/* Power up XGMI if it can be potentially used */
   1358		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20);
   1359	}
   1360
   1361	return bo_va;
   1362}
   1363
   1364
   1365/**
   1366 * amdgpu_vm_bo_insert_map - insert a new mapping
   1367 *
   1368 * @adev: amdgpu_device pointer
   1369 * @bo_va: bo_va to store the address
   1370 * @mapping: the mapping to insert
   1371 *
   1372 * Insert a new mapping into all structures.
   1373 */
   1374static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
   1375				    struct amdgpu_bo_va *bo_va,
   1376				    struct amdgpu_bo_va_mapping *mapping)
   1377{
   1378	struct amdgpu_vm *vm = bo_va->base.vm;
   1379	struct amdgpu_bo *bo = bo_va->base.bo;
   1380
   1381	mapping->bo_va = bo_va;
   1382	list_add(&mapping->list, &bo_va->invalids);
   1383	amdgpu_vm_it_insert(mapping, &vm->va);
   1384
   1385	if (mapping->flags & AMDGPU_PTE_PRT)
   1386		amdgpu_vm_prt_get(adev);
   1387
   1388	if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
   1389	    !bo_va->base.moved) {
   1390		list_move(&bo_va->base.vm_status, &vm->moved);
   1391	}
   1392	trace_amdgpu_vm_bo_map(bo_va, mapping);
   1393}
   1394
   1395/**
   1396 * amdgpu_vm_bo_map - map bo inside a vm
   1397 *
   1398 * @adev: amdgpu_device pointer
   1399 * @bo_va: bo_va to store the address
   1400 * @saddr: where to map the BO
   1401 * @offset: requested offset in the BO
   1402 * @size: BO size in bytes
   1403 * @flags: attributes of pages (read/write/valid/etc.)
   1404 *
   1405 * Add a mapping of the BO at the specefied addr into the VM.
   1406 *
   1407 * Returns:
   1408 * 0 for success, error for failure.
   1409 *
   1410 * Object has to be reserved and unreserved outside!
   1411 */
   1412int amdgpu_vm_bo_map(struct amdgpu_device *adev,
   1413		     struct amdgpu_bo_va *bo_va,
   1414		     uint64_t saddr, uint64_t offset,
   1415		     uint64_t size, uint64_t flags)
   1416{
   1417	struct amdgpu_bo_va_mapping *mapping, *tmp;
   1418	struct amdgpu_bo *bo = bo_va->base.bo;
   1419	struct amdgpu_vm *vm = bo_va->base.vm;
   1420	uint64_t eaddr;
   1421
   1422	/* validate the parameters */
   1423	if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
   1424	    size == 0 || size & ~PAGE_MASK)
   1425		return -EINVAL;
   1426
   1427	/* make sure object fit at this offset */
   1428	eaddr = saddr + size - 1;
   1429	if (saddr >= eaddr ||
   1430	    (bo && offset + size > amdgpu_bo_size(bo)) ||
   1431	    (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
   1432		return -EINVAL;
   1433
   1434	saddr /= AMDGPU_GPU_PAGE_SIZE;
   1435	eaddr /= AMDGPU_GPU_PAGE_SIZE;
   1436
   1437	tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
   1438	if (tmp) {
   1439		/* bo and tmp overlap, invalid addr */
   1440		dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
   1441			"0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
   1442			tmp->start, tmp->last + 1);
   1443		return -EINVAL;
   1444	}
   1445
   1446	mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
   1447	if (!mapping)
   1448		return -ENOMEM;
   1449
   1450	mapping->start = saddr;
   1451	mapping->last = eaddr;
   1452	mapping->offset = offset;
   1453	mapping->flags = flags;
   1454
   1455	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
   1456
   1457	return 0;
   1458}
   1459
   1460/**
   1461 * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
   1462 *
   1463 * @adev: amdgpu_device pointer
   1464 * @bo_va: bo_va to store the address
   1465 * @saddr: where to map the BO
   1466 * @offset: requested offset in the BO
   1467 * @size: BO size in bytes
   1468 * @flags: attributes of pages (read/write/valid/etc.)
   1469 *
   1470 * Add a mapping of the BO at the specefied addr into the VM. Replace existing
   1471 * mappings as we do so.
   1472 *
   1473 * Returns:
   1474 * 0 for success, error for failure.
   1475 *
   1476 * Object has to be reserved and unreserved outside!
   1477 */
   1478int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
   1479			     struct amdgpu_bo_va *bo_va,
   1480			     uint64_t saddr, uint64_t offset,
   1481			     uint64_t size, uint64_t flags)
   1482{
   1483	struct amdgpu_bo_va_mapping *mapping;
   1484	struct amdgpu_bo *bo = bo_va->base.bo;
   1485	uint64_t eaddr;
   1486	int r;
   1487
   1488	/* validate the parameters */
   1489	if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
   1490	    size == 0 || size & ~PAGE_MASK)
   1491		return -EINVAL;
   1492
   1493	/* make sure object fit at this offset */
   1494	eaddr = saddr + size - 1;
   1495	if (saddr >= eaddr ||
   1496	    (bo && offset + size > amdgpu_bo_size(bo)) ||
   1497	    (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
   1498		return -EINVAL;
   1499
   1500	/* Allocate all the needed memory */
   1501	mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
   1502	if (!mapping)
   1503		return -ENOMEM;
   1504
   1505	r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
   1506	if (r) {
   1507		kfree(mapping);
   1508		return r;
   1509	}
   1510
   1511	saddr /= AMDGPU_GPU_PAGE_SIZE;
   1512	eaddr /= AMDGPU_GPU_PAGE_SIZE;
   1513
   1514	mapping->start = saddr;
   1515	mapping->last = eaddr;
   1516	mapping->offset = offset;
   1517	mapping->flags = flags;
   1518
   1519	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
   1520
   1521	return 0;
   1522}
   1523
   1524/**
   1525 * amdgpu_vm_bo_unmap - remove bo mapping from vm
   1526 *
   1527 * @adev: amdgpu_device pointer
   1528 * @bo_va: bo_va to remove the address from
   1529 * @saddr: where to the BO is mapped
   1530 *
   1531 * Remove a mapping of the BO at the specefied addr from the VM.
   1532 *
   1533 * Returns:
   1534 * 0 for success, error for failure.
   1535 *
   1536 * Object has to be reserved and unreserved outside!
   1537 */
   1538int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
   1539		       struct amdgpu_bo_va *bo_va,
   1540		       uint64_t saddr)
   1541{
   1542	struct amdgpu_bo_va_mapping *mapping;
   1543	struct amdgpu_vm *vm = bo_va->base.vm;
   1544	bool valid = true;
   1545
   1546	saddr /= AMDGPU_GPU_PAGE_SIZE;
   1547
   1548	list_for_each_entry(mapping, &bo_va->valids, list) {
   1549		if (mapping->start == saddr)
   1550			break;
   1551	}
   1552
   1553	if (&mapping->list == &bo_va->valids) {
   1554		valid = false;
   1555
   1556		list_for_each_entry(mapping, &bo_va->invalids, list) {
   1557			if (mapping->start == saddr)
   1558				break;
   1559		}
   1560
   1561		if (&mapping->list == &bo_va->invalids)
   1562			return -ENOENT;
   1563	}
   1564
   1565	list_del(&mapping->list);
   1566	amdgpu_vm_it_remove(mapping, &vm->va);
   1567	mapping->bo_va = NULL;
   1568	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
   1569
   1570	if (valid)
   1571		list_add(&mapping->list, &vm->freed);
   1572	else
   1573		amdgpu_vm_free_mapping(adev, vm, mapping,
   1574				       bo_va->last_pt_update);
   1575
   1576	return 0;
   1577}
   1578
   1579/**
   1580 * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
   1581 *
   1582 * @adev: amdgpu_device pointer
   1583 * @vm: VM structure to use
   1584 * @saddr: start of the range
   1585 * @size: size of the range
   1586 *
   1587 * Remove all mappings in a range, split them as appropriate.
   1588 *
   1589 * Returns:
   1590 * 0 for success, error for failure.
   1591 */
   1592int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
   1593				struct amdgpu_vm *vm,
   1594				uint64_t saddr, uint64_t size)
   1595{
   1596	struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
   1597	LIST_HEAD(removed);
   1598	uint64_t eaddr;
   1599
   1600	eaddr = saddr + size - 1;
   1601	saddr /= AMDGPU_GPU_PAGE_SIZE;
   1602	eaddr /= AMDGPU_GPU_PAGE_SIZE;
   1603
   1604	/* Allocate all the needed memory */
   1605	before = kzalloc(sizeof(*before), GFP_KERNEL);
   1606	if (!before)
   1607		return -ENOMEM;
   1608	INIT_LIST_HEAD(&before->list);
   1609
   1610	after = kzalloc(sizeof(*after), GFP_KERNEL);
   1611	if (!after) {
   1612		kfree(before);
   1613		return -ENOMEM;
   1614	}
   1615	INIT_LIST_HEAD(&after->list);
   1616
   1617	/* Now gather all removed mappings */
   1618	tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
   1619	while (tmp) {
   1620		/* Remember mapping split at the start */
   1621		if (tmp->start < saddr) {
   1622			before->start = tmp->start;
   1623			before->last = saddr - 1;
   1624			before->offset = tmp->offset;
   1625			before->flags = tmp->flags;
   1626			before->bo_va = tmp->bo_va;
   1627			list_add(&before->list, &tmp->bo_va->invalids);
   1628		}
   1629
   1630		/* Remember mapping split at the end */
   1631		if (tmp->last > eaddr) {
   1632			after->start = eaddr + 1;
   1633			after->last = tmp->last;
   1634			after->offset = tmp->offset;
   1635			after->offset += (after->start - tmp->start) << PAGE_SHIFT;
   1636			after->flags = tmp->flags;
   1637			after->bo_va = tmp->bo_va;
   1638			list_add(&after->list, &tmp->bo_va->invalids);
   1639		}
   1640
   1641		list_del(&tmp->list);
   1642		list_add(&tmp->list, &removed);
   1643
   1644		tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
   1645	}
   1646
   1647	/* And free them up */
   1648	list_for_each_entry_safe(tmp, next, &removed, list) {
   1649		amdgpu_vm_it_remove(tmp, &vm->va);
   1650		list_del(&tmp->list);
   1651
   1652		if (tmp->start < saddr)
   1653		    tmp->start = saddr;
   1654		if (tmp->last > eaddr)
   1655		    tmp->last = eaddr;
   1656
   1657		tmp->bo_va = NULL;
   1658		list_add(&tmp->list, &vm->freed);
   1659		trace_amdgpu_vm_bo_unmap(NULL, tmp);
   1660	}
   1661
   1662	/* Insert partial mapping before the range */
   1663	if (!list_empty(&before->list)) {
   1664		amdgpu_vm_it_insert(before, &vm->va);
   1665		if (before->flags & AMDGPU_PTE_PRT)
   1666			amdgpu_vm_prt_get(adev);
   1667	} else {
   1668		kfree(before);
   1669	}
   1670
   1671	/* Insert partial mapping after the range */
   1672	if (!list_empty(&after->list)) {
   1673		amdgpu_vm_it_insert(after, &vm->va);
   1674		if (after->flags & AMDGPU_PTE_PRT)
   1675			amdgpu_vm_prt_get(adev);
   1676	} else {
   1677		kfree(after);
   1678	}
   1679
   1680	return 0;
   1681}
   1682
   1683/**
   1684 * amdgpu_vm_bo_lookup_mapping - find mapping by address
   1685 *
   1686 * @vm: the requested VM
   1687 * @addr: the address
   1688 *
   1689 * Find a mapping by it's address.
   1690 *
   1691 * Returns:
   1692 * The amdgpu_bo_va_mapping matching for addr or NULL
   1693 *
   1694 */
   1695struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
   1696							 uint64_t addr)
   1697{
   1698	return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
   1699}
   1700
   1701/**
   1702 * amdgpu_vm_bo_trace_cs - trace all reserved mappings
   1703 *
   1704 * @vm: the requested vm
   1705 * @ticket: CS ticket
   1706 *
   1707 * Trace all mappings of BOs reserved during a command submission.
   1708 */
   1709void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
   1710{
   1711	struct amdgpu_bo_va_mapping *mapping;
   1712
   1713	if (!trace_amdgpu_vm_bo_cs_enabled())
   1714		return;
   1715
   1716	for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping;
   1717	     mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) {
   1718		if (mapping->bo_va && mapping->bo_va->base.bo) {
   1719			struct amdgpu_bo *bo;
   1720
   1721			bo = mapping->bo_va->base.bo;
   1722			if (dma_resv_locking_ctx(bo->tbo.base.resv) !=
   1723			    ticket)
   1724				continue;
   1725		}
   1726
   1727		trace_amdgpu_vm_bo_cs(mapping);
   1728	}
   1729}
   1730
   1731/**
   1732 * amdgpu_vm_bo_del - remove a bo from a specific vm
   1733 *
   1734 * @adev: amdgpu_device pointer
   1735 * @bo_va: requested bo_va
   1736 *
   1737 * Remove @bo_va->bo from the requested vm.
   1738 *
   1739 * Object have to be reserved!
   1740 */
   1741void amdgpu_vm_bo_del(struct amdgpu_device *adev,
   1742		      struct amdgpu_bo_va *bo_va)
   1743{
   1744	struct amdgpu_bo_va_mapping *mapping, *next;
   1745	struct amdgpu_bo *bo = bo_va->base.bo;
   1746	struct amdgpu_vm *vm = bo_va->base.vm;
   1747	struct amdgpu_vm_bo_base **base;
   1748
   1749	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
   1750
   1751	if (bo) {
   1752		dma_resv_assert_held(bo->tbo.base.resv);
   1753		if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
   1754			ttm_bo_set_bulk_move(&bo->tbo, NULL);
   1755
   1756		for (base = &bo_va->base.bo->vm_bo; *base;
   1757		     base = &(*base)->next) {
   1758			if (*base != &bo_va->base)
   1759				continue;
   1760
   1761			*base = bo_va->base.next;
   1762			break;
   1763		}
   1764	}
   1765
   1766	spin_lock(&vm->invalidated_lock);
   1767	list_del(&bo_va->base.vm_status);
   1768	spin_unlock(&vm->invalidated_lock);
   1769
   1770	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
   1771		list_del(&mapping->list);
   1772		amdgpu_vm_it_remove(mapping, &vm->va);
   1773		mapping->bo_va = NULL;
   1774		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
   1775		list_add(&mapping->list, &vm->freed);
   1776	}
   1777	list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
   1778		list_del(&mapping->list);
   1779		amdgpu_vm_it_remove(mapping, &vm->va);
   1780		amdgpu_vm_free_mapping(adev, vm, mapping,
   1781				       bo_va->last_pt_update);
   1782	}
   1783
   1784	dma_fence_put(bo_va->last_pt_update);
   1785
   1786	if (bo && bo_va->is_xgmi)
   1787		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN);
   1788
   1789	kfree(bo_va);
   1790}
   1791
   1792/**
   1793 * amdgpu_vm_evictable - check if we can evict a VM
   1794 *
   1795 * @bo: A page table of the VM.
   1796 *
   1797 * Check if it is possible to evict a VM.
   1798 */
   1799bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
   1800{
   1801	struct amdgpu_vm_bo_base *bo_base = bo->vm_bo;
   1802
   1803	/* Page tables of a destroyed VM can go away immediately */
   1804	if (!bo_base || !bo_base->vm)
   1805		return true;
   1806
   1807	/* Don't evict VM page tables while they are busy */
   1808	if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP))
   1809		return false;
   1810
   1811	/* Try to block ongoing updates */
   1812	if (!amdgpu_vm_eviction_trylock(bo_base->vm))
   1813		return false;
   1814
   1815	/* Don't evict VM page tables while they are updated */
   1816	if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
   1817		amdgpu_vm_eviction_unlock(bo_base->vm);
   1818		return false;
   1819	}
   1820
   1821	bo_base->vm->evicting = true;
   1822	amdgpu_vm_eviction_unlock(bo_base->vm);
   1823	return true;
   1824}
   1825
   1826/**
   1827 * amdgpu_vm_bo_invalidate - mark the bo as invalid
   1828 *
   1829 * @adev: amdgpu_device pointer
   1830 * @bo: amdgpu buffer object
   1831 * @evicted: is the BO evicted
   1832 *
   1833 * Mark @bo as invalid.
   1834 */
   1835void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
   1836			     struct amdgpu_bo *bo, bool evicted)
   1837{
   1838	struct amdgpu_vm_bo_base *bo_base;
   1839
   1840	/* shadow bo doesn't have bo base, its validation needs its parent */
   1841	if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo))
   1842		bo = bo->parent;
   1843
   1844	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
   1845		struct amdgpu_vm *vm = bo_base->vm;
   1846
   1847		if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
   1848			amdgpu_vm_bo_evicted(bo_base);
   1849			continue;
   1850		}
   1851
   1852		if (bo_base->moved)
   1853			continue;
   1854		bo_base->moved = true;
   1855
   1856		if (bo->tbo.type == ttm_bo_type_kernel)
   1857			amdgpu_vm_bo_relocated(bo_base);
   1858		else if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
   1859			amdgpu_vm_bo_moved(bo_base);
   1860		else
   1861			amdgpu_vm_bo_invalidated(bo_base);
   1862	}
   1863}
   1864
   1865/**
   1866 * amdgpu_vm_get_block_size - calculate VM page table size as power of two
   1867 *
   1868 * @vm_size: VM size
   1869 *
   1870 * Returns:
   1871 * VM page table as power of two
   1872 */
   1873static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
   1874{
   1875	/* Total bits covered by PD + PTs */
   1876	unsigned bits = ilog2(vm_size) + 18;
   1877
   1878	/* Make sure the PD is 4K in size up to 8GB address space.
   1879	   Above that split equal between PD and PTs */
   1880	if (vm_size <= 8)
   1881		return (bits - 9);
   1882	else
   1883		return ((bits + 3) / 2);
   1884}
   1885
   1886/**
   1887 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
   1888 *
   1889 * @adev: amdgpu_device pointer
   1890 * @min_vm_size: the minimum vm size in GB if it's set auto
   1891 * @fragment_size_default: Default PTE fragment size
   1892 * @max_level: max VMPT level
   1893 * @max_bits: max address space size in bits
   1894 *
   1895 */
   1896void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
   1897			   uint32_t fragment_size_default, unsigned max_level,
   1898			   unsigned max_bits)
   1899{
   1900	unsigned int max_size = 1 << (max_bits - 30);
   1901	unsigned int vm_size;
   1902	uint64_t tmp;
   1903
   1904	/* adjust vm size first */
   1905	if (amdgpu_vm_size != -1) {
   1906		vm_size = amdgpu_vm_size;
   1907		if (vm_size > max_size) {
   1908			dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
   1909				 amdgpu_vm_size, max_size);
   1910			vm_size = max_size;
   1911		}
   1912	} else {
   1913		struct sysinfo si;
   1914		unsigned int phys_ram_gb;
   1915
   1916		/* Optimal VM size depends on the amount of physical
   1917		 * RAM available. Underlying requirements and
   1918		 * assumptions:
   1919		 *
   1920		 *  - Need to map system memory and VRAM from all GPUs
   1921		 *     - VRAM from other GPUs not known here
   1922		 *     - Assume VRAM <= system memory
   1923		 *  - On GFX8 and older, VM space can be segmented for
   1924		 *    different MTYPEs
   1925		 *  - Need to allow room for fragmentation, guard pages etc.
   1926		 *
   1927		 * This adds up to a rough guess of system memory x3.
   1928		 * Round up to power of two to maximize the available
   1929		 * VM size with the given page table size.
   1930		 */
   1931		si_meminfo(&si);
   1932		phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
   1933			       (1 << 30) - 1) >> 30;
   1934		vm_size = roundup_pow_of_two(
   1935			min(max(phys_ram_gb * 3, min_vm_size), max_size));
   1936	}
   1937
   1938	adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
   1939
   1940	tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
   1941	if (amdgpu_vm_block_size != -1)
   1942		tmp >>= amdgpu_vm_block_size - 9;
   1943	tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
   1944	adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
   1945	switch (adev->vm_manager.num_level) {
   1946	case 3:
   1947		adev->vm_manager.root_level = AMDGPU_VM_PDB2;
   1948		break;
   1949	case 2:
   1950		adev->vm_manager.root_level = AMDGPU_VM_PDB1;
   1951		break;
   1952	case 1:
   1953		adev->vm_manager.root_level = AMDGPU_VM_PDB0;
   1954		break;
   1955	default:
   1956		dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
   1957	}
   1958	/* block size depends on vm size and hw setup*/
   1959	if (amdgpu_vm_block_size != -1)
   1960		adev->vm_manager.block_size =
   1961			min((unsigned)amdgpu_vm_block_size, max_bits
   1962			    - AMDGPU_GPU_PAGE_SHIFT
   1963			    - 9 * adev->vm_manager.num_level);
   1964	else if (adev->vm_manager.num_level > 1)
   1965		adev->vm_manager.block_size = 9;
   1966	else
   1967		adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
   1968
   1969	if (amdgpu_vm_fragment_size == -1)
   1970		adev->vm_manager.fragment_size = fragment_size_default;
   1971	else
   1972		adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
   1973
   1974	DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
   1975		 vm_size, adev->vm_manager.num_level + 1,
   1976		 adev->vm_manager.block_size,
   1977		 adev->vm_manager.fragment_size);
   1978}
   1979
   1980/**
   1981 * amdgpu_vm_wait_idle - wait for the VM to become idle
   1982 *
   1983 * @vm: VM object to wait for
   1984 * @timeout: timeout to wait for VM to become idle
   1985 */
   1986long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
   1987{
   1988	timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv,
   1989					DMA_RESV_USAGE_BOOKKEEP,
   1990					true, timeout);
   1991	if (timeout <= 0)
   1992		return timeout;
   1993
   1994	return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
   1995}
   1996
   1997/**
   1998 * amdgpu_vm_init - initialize a vm instance
   1999 *
   2000 * @adev: amdgpu_device pointer
   2001 * @vm: requested vm
   2002 *
   2003 * Init @vm fields.
   2004 *
   2005 * Returns:
   2006 * 0 for success, error for failure.
   2007 */
   2008int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
   2009{
   2010	struct amdgpu_bo *root_bo;
   2011	struct amdgpu_bo_vm *root;
   2012	int r, i;
   2013
   2014	vm->va = RB_ROOT_CACHED;
   2015	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
   2016		vm->reserved_vmid[i] = NULL;
   2017	INIT_LIST_HEAD(&vm->evicted);
   2018	INIT_LIST_HEAD(&vm->relocated);
   2019	INIT_LIST_HEAD(&vm->moved);
   2020	INIT_LIST_HEAD(&vm->idle);
   2021	INIT_LIST_HEAD(&vm->invalidated);
   2022	spin_lock_init(&vm->invalidated_lock);
   2023	INIT_LIST_HEAD(&vm->freed);
   2024	INIT_LIST_HEAD(&vm->done);
   2025
   2026	/* create scheduler entities for page table updates */
   2027	r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
   2028				  adev->vm_manager.vm_pte_scheds,
   2029				  adev->vm_manager.vm_pte_num_scheds, NULL);
   2030	if (r)
   2031		return r;
   2032
   2033	r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
   2034				  adev->vm_manager.vm_pte_scheds,
   2035				  adev->vm_manager.vm_pte_num_scheds, NULL);
   2036	if (r)
   2037		goto error_free_immediate;
   2038
   2039	vm->pte_support_ats = false;
   2040	vm->is_compute_context = false;
   2041
   2042	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
   2043				    AMDGPU_VM_USE_CPU_FOR_GFX);
   2044
   2045	DRM_DEBUG_DRIVER("VM update mode is %s\n",
   2046			 vm->use_cpu_for_update ? "CPU" : "SDMA");
   2047	WARN_ONCE((vm->use_cpu_for_update &&
   2048		   !amdgpu_gmc_vram_full_visible(&adev->gmc)),
   2049		  "CPU update of VM recommended only for large BAR system\n");
   2050
   2051	if (vm->use_cpu_for_update)
   2052		vm->update_funcs = &amdgpu_vm_cpu_funcs;
   2053	else
   2054		vm->update_funcs = &amdgpu_vm_sdma_funcs;
   2055	vm->last_update = NULL;
   2056	vm->last_unlocked = dma_fence_get_stub();
   2057	vm->last_tlb_flush = dma_fence_get_stub();
   2058
   2059	mutex_init(&vm->eviction_lock);
   2060	vm->evicting = false;
   2061
   2062	r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
   2063				false, &root);
   2064	if (r)
   2065		goto error_free_delayed;
   2066	root_bo = &root->bo;
   2067	r = amdgpu_bo_reserve(root_bo, true);
   2068	if (r)
   2069		goto error_free_root;
   2070
   2071	r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
   2072	if (r)
   2073		goto error_unreserve;
   2074
   2075	amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
   2076
   2077	r = amdgpu_vm_pt_clear(adev, vm, root, false);
   2078	if (r)
   2079		goto error_unreserve;
   2080
   2081	amdgpu_bo_unreserve(vm->root.bo);
   2082
   2083	INIT_KFIFO(vm->faults);
   2084
   2085	return 0;
   2086
   2087error_unreserve:
   2088	amdgpu_bo_unreserve(vm->root.bo);
   2089
   2090error_free_root:
   2091	amdgpu_bo_unref(&root->shadow);
   2092	amdgpu_bo_unref(&root_bo);
   2093	vm->root.bo = NULL;
   2094
   2095error_free_delayed:
   2096	dma_fence_put(vm->last_tlb_flush);
   2097	dma_fence_put(vm->last_unlocked);
   2098	drm_sched_entity_destroy(&vm->delayed);
   2099
   2100error_free_immediate:
   2101	drm_sched_entity_destroy(&vm->immediate);
   2102
   2103	return r;
   2104}
   2105
   2106/**
   2107 * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
   2108 *
   2109 * @adev: amdgpu_device pointer
   2110 * @vm: requested vm
   2111 *
   2112 * This only works on GFX VMs that don't have any BOs added and no
   2113 * page tables allocated yet.
   2114 *
   2115 * Changes the following VM parameters:
   2116 * - use_cpu_for_update
   2117 * - pte_supports_ats
   2118 *
   2119 * Reinitializes the page directory to reflect the changed ATS
   2120 * setting.
   2121 *
   2122 * Returns:
   2123 * 0 for success, -errno for errors.
   2124 */
   2125int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
   2126{
   2127	bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
   2128	int r;
   2129
   2130	r = amdgpu_bo_reserve(vm->root.bo, true);
   2131	if (r)
   2132		return r;
   2133
   2134	/* Sanity checks */
   2135	if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
   2136		r = -EINVAL;
   2137		goto unreserve_bo;
   2138	}
   2139
   2140	/* Check if PD needs to be reinitialized and do it before
   2141	 * changing any other state, in case it fails.
   2142	 */
   2143	if (pte_support_ats != vm->pte_support_ats) {
   2144		vm->pte_support_ats = pte_support_ats;
   2145		r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
   2146				       false);
   2147		if (r)
   2148			goto unreserve_bo;
   2149	}
   2150
   2151	/* Update VM state */
   2152	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
   2153				    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
   2154	DRM_DEBUG_DRIVER("VM update mode is %s\n",
   2155			 vm->use_cpu_for_update ? "CPU" : "SDMA");
   2156	WARN_ONCE((vm->use_cpu_for_update &&
   2157		   !amdgpu_gmc_vram_full_visible(&adev->gmc)),
   2158		  "CPU update of VM recommended only for large BAR system\n");
   2159
   2160	if (vm->use_cpu_for_update) {
   2161		/* Sync with last SDMA update/clear before switching to CPU */
   2162		r = amdgpu_bo_sync_wait(vm->root.bo,
   2163					AMDGPU_FENCE_OWNER_UNDEFINED, true);
   2164		if (r)
   2165			goto unreserve_bo;
   2166
   2167		vm->update_funcs = &amdgpu_vm_cpu_funcs;
   2168	} else {
   2169		vm->update_funcs = &amdgpu_vm_sdma_funcs;
   2170	}
   2171	dma_fence_put(vm->last_update);
   2172	vm->last_update = NULL;
   2173	vm->is_compute_context = true;
   2174
   2175	/* Free the shadow bo for compute VM */
   2176	amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
   2177
   2178	goto unreserve_bo;
   2179
   2180unreserve_bo:
   2181	amdgpu_bo_unreserve(vm->root.bo);
   2182	return r;
   2183}
   2184
   2185/**
   2186 * amdgpu_vm_release_compute - release a compute vm
   2187 * @adev: amdgpu_device pointer
   2188 * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute
   2189 *
   2190 * This is a correspondant of amdgpu_vm_make_compute. It decouples compute
   2191 * pasid from vm. Compute should stop use of vm after this call.
   2192 */
   2193void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
   2194{
   2195	amdgpu_vm_set_pasid(adev, vm, 0);
   2196	vm->is_compute_context = false;
   2197}
   2198
   2199/**
   2200 * amdgpu_vm_fini - tear down a vm instance
   2201 *
   2202 * @adev: amdgpu_device pointer
   2203 * @vm: requested vm
   2204 *
   2205 * Tear down @vm.
   2206 * Unbind the VM and remove all bos from the vm bo list
   2207 */
   2208void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
   2209{
   2210	struct amdgpu_bo_va_mapping *mapping, *tmp;
   2211	bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
   2212	struct amdgpu_bo *root;
   2213	unsigned long flags;
   2214	int i;
   2215
   2216	amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
   2217
   2218	root = amdgpu_bo_ref(vm->root.bo);
   2219	amdgpu_bo_reserve(root, true);
   2220	amdgpu_vm_set_pasid(adev, vm, 0);
   2221	dma_fence_wait(vm->last_unlocked, false);
   2222	dma_fence_put(vm->last_unlocked);
   2223	dma_fence_wait(vm->last_tlb_flush, false);
   2224	/* Make sure that all fence callbacks have completed */
   2225	spin_lock_irqsave(vm->last_tlb_flush->lock, flags);
   2226	spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags);
   2227	dma_fence_put(vm->last_tlb_flush);
   2228
   2229	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
   2230		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
   2231			amdgpu_vm_prt_fini(adev, vm);
   2232			prt_fini_needed = false;
   2233		}
   2234
   2235		list_del(&mapping->list);
   2236		amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
   2237	}
   2238
   2239	amdgpu_vm_pt_free_root(adev, vm);
   2240	amdgpu_bo_unreserve(root);
   2241	amdgpu_bo_unref(&root);
   2242	WARN_ON(vm->root.bo);
   2243
   2244	drm_sched_entity_destroy(&vm->immediate);
   2245	drm_sched_entity_destroy(&vm->delayed);
   2246
   2247	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
   2248		dev_err(adev->dev, "still active bo inside vm\n");
   2249	}
   2250	rbtree_postorder_for_each_entry_safe(mapping, tmp,
   2251					     &vm->va.rb_root, rb) {
   2252		/* Don't remove the mapping here, we don't want to trigger a
   2253		 * rebalance and the tree is about to be destroyed anyway.
   2254		 */
   2255		list_del(&mapping->list);
   2256		kfree(mapping);
   2257	}
   2258
   2259	dma_fence_put(vm->last_update);
   2260	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
   2261		amdgpu_vmid_free_reserved(adev, vm, i);
   2262}
   2263
   2264/**
   2265 * amdgpu_vm_manager_init - init the VM manager
   2266 *
   2267 * @adev: amdgpu_device pointer
   2268 *
   2269 * Initialize the VM manager structures
   2270 */
   2271void amdgpu_vm_manager_init(struct amdgpu_device *adev)
   2272{
   2273	unsigned i;
   2274
   2275	/* Concurrent flushes are only possible starting with Vega10 and
   2276	 * are broken on Navi10 and Navi14.
   2277	 */
   2278	adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 ||
   2279					      adev->asic_type == CHIP_NAVI10 ||
   2280					      adev->asic_type == CHIP_NAVI14);
   2281	amdgpu_vmid_mgr_init(adev);
   2282
   2283	adev->vm_manager.fence_context =
   2284		dma_fence_context_alloc(AMDGPU_MAX_RINGS);
   2285	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
   2286		adev->vm_manager.seqno[i] = 0;
   2287
   2288	spin_lock_init(&adev->vm_manager.prt_lock);
   2289	atomic_set(&adev->vm_manager.num_prt_users, 0);
   2290
   2291	/* If not overridden by the user, by default, only in large BAR systems
   2292	 * Compute VM tables will be updated by CPU
   2293	 */
   2294#ifdef CONFIG_X86_64
   2295	if (amdgpu_vm_update_mode == -1) {
   2296		if (amdgpu_gmc_vram_full_visible(&adev->gmc))
   2297			adev->vm_manager.vm_update_mode =
   2298				AMDGPU_VM_USE_CPU_FOR_COMPUTE;
   2299		else
   2300			adev->vm_manager.vm_update_mode = 0;
   2301	} else
   2302		adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
   2303#else
   2304	adev->vm_manager.vm_update_mode = 0;
   2305#endif
   2306
   2307	xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
   2308}
   2309
   2310/**
   2311 * amdgpu_vm_manager_fini - cleanup VM manager
   2312 *
   2313 * @adev: amdgpu_device pointer
   2314 *
   2315 * Cleanup the VM manager and free resources.
   2316 */
   2317void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
   2318{
   2319	WARN_ON(!xa_empty(&adev->vm_manager.pasids));
   2320	xa_destroy(&adev->vm_manager.pasids);
   2321
   2322	amdgpu_vmid_mgr_fini(adev);
   2323}
   2324
   2325/**
   2326 * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs.
   2327 *
   2328 * @dev: drm device pointer
   2329 * @data: drm_amdgpu_vm
   2330 * @filp: drm file pointer
   2331 *
   2332 * Returns:
   2333 * 0 for success, -errno for errors.
   2334 */
   2335int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
   2336{
   2337	union drm_amdgpu_vm *args = data;
   2338	struct amdgpu_device *adev = drm_to_adev(dev);
   2339	struct amdgpu_fpriv *fpriv = filp->driver_priv;
   2340	long timeout = msecs_to_jiffies(2000);
   2341	int r;
   2342
   2343	switch (args->in.op) {
   2344	case AMDGPU_VM_OP_RESERVE_VMID:
   2345		/* We only have requirement to reserve vmid from gfxhub */
   2346		r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
   2347					       AMDGPU_GFXHUB_0);
   2348		if (r)
   2349			return r;
   2350		break;
   2351	case AMDGPU_VM_OP_UNRESERVE_VMID:
   2352		if (amdgpu_sriov_runtime(adev))
   2353			timeout = 8 * timeout;
   2354
   2355		/* Wait vm idle to make sure the vmid set in SPM_VMID is
   2356		 * not referenced anymore.
   2357		 */
   2358		r = amdgpu_bo_reserve(fpriv->vm.root.bo, true);
   2359		if (r)
   2360			return r;
   2361
   2362		r = amdgpu_vm_wait_idle(&fpriv->vm, timeout);
   2363		if (r < 0)
   2364			return r;
   2365
   2366		amdgpu_bo_unreserve(fpriv->vm.root.bo);
   2367		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
   2368		break;
   2369	default:
   2370		return -EINVAL;
   2371	}
   2372
   2373	return 0;
   2374}
   2375
   2376/**
   2377 * amdgpu_vm_get_task_info - Extracts task info for a PASID.
   2378 *
   2379 * @adev: drm device pointer
   2380 * @pasid: PASID identifier for VM
   2381 * @task_info: task_info to fill.
   2382 */
   2383void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
   2384			 struct amdgpu_task_info *task_info)
   2385{
   2386	struct amdgpu_vm *vm;
   2387	unsigned long flags;
   2388
   2389	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
   2390
   2391	vm = xa_load(&adev->vm_manager.pasids, pasid);
   2392	if (vm)
   2393		*task_info = vm->task_info;
   2394
   2395	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
   2396}
   2397
   2398/**
   2399 * amdgpu_vm_set_task_info - Sets VMs task info.
   2400 *
   2401 * @vm: vm for which to set the info
   2402 */
   2403void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
   2404{
   2405	if (vm->task_info.pid)
   2406		return;
   2407
   2408	vm->task_info.pid = current->pid;
   2409	get_task_comm(vm->task_info.task_name, current);
   2410
   2411	if (current->group_leader->mm != current->mm)
   2412		return;
   2413
   2414	vm->task_info.tgid = current->group_leader->pid;
   2415	get_task_comm(vm->task_info.process_name, current->group_leader);
   2416}
   2417
   2418/**
   2419 * amdgpu_vm_handle_fault - graceful handling of VM faults.
   2420 * @adev: amdgpu device pointer
   2421 * @pasid: PASID of the VM
   2422 * @addr: Address of the fault
   2423 * @write_fault: true is write fault, false is read fault
   2424 *
   2425 * Try to gracefully handle a VM fault. Return true if the fault was handled and
   2426 * shouldn't be reported any more.
   2427 */
   2428bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
   2429			    uint64_t addr, bool write_fault)
   2430{
   2431	bool is_compute_context = false;
   2432	struct amdgpu_bo *root;
   2433	unsigned long irqflags;
   2434	uint64_t value, flags;
   2435	struct amdgpu_vm *vm;
   2436	int r;
   2437
   2438	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
   2439	vm = xa_load(&adev->vm_manager.pasids, pasid);
   2440	if (vm) {
   2441		root = amdgpu_bo_ref(vm->root.bo);
   2442		is_compute_context = vm->is_compute_context;
   2443	} else {
   2444		root = NULL;
   2445	}
   2446	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
   2447
   2448	if (!root)
   2449		return false;
   2450
   2451	addr /= AMDGPU_GPU_PAGE_SIZE;
   2452
   2453	if (is_compute_context &&
   2454	    !svm_range_restore_pages(adev, pasid, addr, write_fault)) {
   2455		amdgpu_bo_unref(&root);
   2456		return true;
   2457	}
   2458
   2459	r = amdgpu_bo_reserve(root, true);
   2460	if (r)
   2461		goto error_unref;
   2462
   2463	/* Double check that the VM still exists */
   2464	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
   2465	vm = xa_load(&adev->vm_manager.pasids, pasid);
   2466	if (vm && vm->root.bo != root)
   2467		vm = NULL;
   2468	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
   2469	if (!vm)
   2470		goto error_unlock;
   2471
   2472	flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
   2473		AMDGPU_PTE_SYSTEM;
   2474
   2475	if (is_compute_context) {
   2476		/* Intentionally setting invalid PTE flag
   2477		 * combination to force a no-retry-fault
   2478		 */
   2479		flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
   2480			AMDGPU_PTE_TF;
   2481		value = 0;
   2482	} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
   2483		/* Redirect the access to the dummy page */
   2484		value = adev->dummy_page_addr;
   2485		flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
   2486			AMDGPU_PTE_WRITEABLE;
   2487
   2488	} else {
   2489		/* Let the hw retry silently on the PTE */
   2490		value = 0;
   2491	}
   2492
   2493	r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
   2494	if (r) {
   2495		pr_debug("failed %d to reserve fence slot\n", r);
   2496		goto error_unlock;
   2497	}
   2498
   2499	r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
   2500				   addr, flags, value, 0, NULL, NULL, NULL);
   2501	if (r)
   2502		goto error_unlock;
   2503
   2504	r = amdgpu_vm_update_pdes(adev, vm, true);
   2505
   2506error_unlock:
   2507	amdgpu_bo_unreserve(root);
   2508	if (r < 0)
   2509		DRM_ERROR("Can't handle page fault (%d)\n", r);
   2510
   2511error_unref:
   2512	amdgpu_bo_unref(&root);
   2513
   2514	return false;
   2515}
   2516
   2517#if defined(CONFIG_DEBUG_FS)
   2518/**
   2519 * amdgpu_debugfs_vm_bo_info  - print BO info for the VM
   2520 *
   2521 * @vm: Requested VM for printing BO info
   2522 * @m: debugfs file
   2523 *
   2524 * Print BO information in debugfs file for the VM
   2525 */
   2526void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
   2527{
   2528	struct amdgpu_bo_va *bo_va, *tmp;
   2529	u64 total_idle = 0;
   2530	u64 total_evicted = 0;
   2531	u64 total_relocated = 0;
   2532	u64 total_moved = 0;
   2533	u64 total_invalidated = 0;
   2534	u64 total_done = 0;
   2535	unsigned int total_idle_objs = 0;
   2536	unsigned int total_evicted_objs = 0;
   2537	unsigned int total_relocated_objs = 0;
   2538	unsigned int total_moved_objs = 0;
   2539	unsigned int total_invalidated_objs = 0;
   2540	unsigned int total_done_objs = 0;
   2541	unsigned int id = 0;
   2542
   2543	seq_puts(m, "\tIdle BOs:\n");
   2544	list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
   2545		if (!bo_va->base.bo)
   2546			continue;
   2547		total_idle += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
   2548	}
   2549	total_idle_objs = id;
   2550	id = 0;
   2551
   2552	seq_puts(m, "\tEvicted BOs:\n");
   2553	list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
   2554		if (!bo_va->base.bo)
   2555			continue;
   2556		total_evicted += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
   2557	}
   2558	total_evicted_objs = id;
   2559	id = 0;
   2560
   2561	seq_puts(m, "\tRelocated BOs:\n");
   2562	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
   2563		if (!bo_va->base.bo)
   2564			continue;
   2565		total_relocated += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
   2566	}
   2567	total_relocated_objs = id;
   2568	id = 0;
   2569
   2570	seq_puts(m, "\tMoved BOs:\n");
   2571	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
   2572		if (!bo_va->base.bo)
   2573			continue;
   2574		total_moved += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
   2575	}
   2576	total_moved_objs = id;
   2577	id = 0;
   2578
   2579	seq_puts(m, "\tInvalidated BOs:\n");
   2580	spin_lock(&vm->invalidated_lock);
   2581	list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
   2582		if (!bo_va->base.bo)
   2583			continue;
   2584		total_invalidated += amdgpu_bo_print_info(id++,	bo_va->base.bo, m);
   2585	}
   2586	total_invalidated_objs = id;
   2587	id = 0;
   2588
   2589	seq_puts(m, "\tDone BOs:\n");
   2590	list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) {
   2591		if (!bo_va->base.bo)
   2592			continue;
   2593		total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
   2594	}
   2595	spin_unlock(&vm->invalidated_lock);
   2596	total_done_objs = id;
   2597
   2598	seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n", total_idle,
   2599		   total_idle_objs);
   2600	seq_printf(m, "\tTotal evicted size:     %12lld\tobjs:\t%d\n", total_evicted,
   2601		   total_evicted_objs);
   2602	seq_printf(m, "\tTotal relocated size:   %12lld\tobjs:\t%d\n", total_relocated,
   2603		   total_relocated_objs);
   2604	seq_printf(m, "\tTotal moved size:       %12lld\tobjs:\t%d\n", total_moved,
   2605		   total_moved_objs);
   2606	seq_printf(m, "\tTotal invalidated size: %12lld\tobjs:\t%d\n", total_invalidated,
   2607		   total_invalidated_objs);
   2608	seq_printf(m, "\tTotal done size:        %12lld\tobjs:\t%d\n", total_done,
   2609		   total_done_objs);
   2610}
   2611#endif