cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

amdgpu_amdkfd.c (19574B)


      1/*
      2 * Copyright 2014 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 */
     22
     23#include "amdgpu_amdkfd.h"
     24#include "amd_pcie.h"
     25#include "amd_shared.h"
     26
     27#include "amdgpu.h"
     28#include "amdgpu_gfx.h"
     29#include "amdgpu_dma_buf.h"
     30#include <linux/module.h>
     31#include <linux/dma-buf.h>
     32#include "amdgpu_xgmi.h"
     33#include <uapi/linux/kfd_ioctl.h>
     34#include "amdgpu_ras.h"
     35#include "amdgpu_umc.h"
     36
     37/* Total memory size in system memory and all GPU VRAM. Used to
     38 * estimate worst case amount of memory to reserve for page tables
     39 */
     40uint64_t amdgpu_amdkfd_total_mem_size;
     41
     42static bool kfd_initialized;
     43
     44int amdgpu_amdkfd_init(void)
     45{
     46	struct sysinfo si;
     47	int ret;
     48
     49	si_meminfo(&si);
     50	amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh;
     51	amdgpu_amdkfd_total_mem_size *= si.mem_unit;
     52
     53	ret = kgd2kfd_init();
     54	amdgpu_amdkfd_gpuvm_init_mem_limits();
     55	kfd_initialized = !ret;
     56
     57	return ret;
     58}
     59
     60void amdgpu_amdkfd_fini(void)
     61{
     62	if (kfd_initialized) {
     63		kgd2kfd_exit();
     64		kfd_initialized = false;
     65	}
     66}
     67
     68void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
     69{
     70	bool vf = amdgpu_sriov_vf(adev);
     71
     72	if (!kfd_initialized)
     73		return;
     74
     75	adev->kfd.dev = kgd2kfd_probe(adev, vf);
     76
     77	if (adev->kfd.dev)
     78		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
     79}
     80
     81/**
     82 * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
     83 *                                setup amdkfd
     84 *
     85 * @adev: amdgpu_device pointer
     86 * @aperture_base: output returning doorbell aperture base physical address
     87 * @aperture_size: output returning doorbell aperture size in bytes
     88 * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
     89 *
     90 * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
     91 * takes doorbells required for its own rings and reports the setup to amdkfd.
     92 * amdgpu reserved doorbells are at the start of the doorbell aperture.
     93 */
     94static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
     95					 phys_addr_t *aperture_base,
     96					 size_t *aperture_size,
     97					 size_t *start_offset)
     98{
     99	/*
    100	 * The first num_doorbells are used by amdgpu.
    101	 * amdkfd takes whatever's left in the aperture.
    102	 */
    103	if (adev->enable_mes) {
    104		/*
    105		 * With MES enabled, we only need to initialize
    106		 * the base address. The size and offset are
    107		 * not initialized as AMDGPU manages the whole
    108		 * doorbell space.
    109		 */
    110		*aperture_base = adev->doorbell.base;
    111		*aperture_size = 0;
    112		*start_offset = 0;
    113	} else if (adev->doorbell.size > adev->doorbell.num_doorbells *
    114						sizeof(u32)) {
    115		*aperture_base = adev->doorbell.base;
    116		*aperture_size = adev->doorbell.size;
    117		*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
    118	} else {
    119		*aperture_base = 0;
    120		*aperture_size = 0;
    121		*start_offset = 0;
    122	}
    123}
    124
    125void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
    126{
    127	int i;
    128	int last_valid_bit;
    129
    130	if (adev->kfd.dev) {
    131		struct kgd2kfd_shared_resources gpu_resources = {
    132			.compute_vmid_bitmap =
    133				((1 << AMDGPU_NUM_VMID) - 1) -
    134				((1 << adev->vm_manager.first_kfd_vmid) - 1),
    135			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
    136			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
    137			.gpuvm_size = min(adev->vm_manager.max_pfn
    138					  << AMDGPU_GPU_PAGE_SHIFT,
    139					  AMDGPU_GMC_HOLE_START),
    140			.drm_render_minor = adev_to_drm(adev)->render->index,
    141			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
    142			.enable_mes = adev->enable_mes,
    143		};
    144
    145		/* this is going to have a few of the MSBs set that we need to
    146		 * clear
    147		 */
    148		bitmap_complement(gpu_resources.cp_queue_bitmap,
    149				  adev->gfx.mec.queue_bitmap,
    150				  KGD_MAX_QUEUES);
    151
    152		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
    153		 * nbits is not compile time constant
    154		 */
    155		last_valid_bit = 1 /* only first MEC can have compute queues */
    156				* adev->gfx.mec.num_pipe_per_mec
    157				* adev->gfx.mec.num_queue_per_pipe;
    158		for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
    159			clear_bit(i, gpu_resources.cp_queue_bitmap);
    160
    161		amdgpu_doorbell_get_kfd_info(adev,
    162				&gpu_resources.doorbell_physical_address,
    163				&gpu_resources.doorbell_aperture_size,
    164				&gpu_resources.doorbell_start_offset);
    165
    166		/* Since SOC15, BIF starts to statically use the
    167		 * lower 12 bits of doorbell addresses for routing
    168		 * based on settings in registers like
    169		 * SDMA0_DOORBELL_RANGE etc..
    170		 * In order to route a doorbell to CP engine, the lower
    171		 * 12 bits of its address has to be outside the range
    172		 * set for SDMA, VCN, and IH blocks.
    173		 */
    174		if (adev->asic_type >= CHIP_VEGA10) {
    175			gpu_resources.non_cp_doorbells_start =
    176					adev->doorbell_index.first_non_cp;
    177			gpu_resources.non_cp_doorbells_end =
    178					adev->doorbell_index.last_non_cp;
    179		}
    180
    181		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
    182						adev_to_drm(adev), &gpu_resources);
    183	}
    184}
    185
    186void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
    187{
    188	if (adev->kfd.dev) {
    189		kgd2kfd_device_exit(adev->kfd.dev);
    190		adev->kfd.dev = NULL;
    191	}
    192}
    193
    194void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
    195		const void *ih_ring_entry)
    196{
    197	if (adev->kfd.dev)
    198		kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
    199}
    200
    201void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
    202{
    203	if (adev->kfd.dev)
    204		kgd2kfd_suspend(adev->kfd.dev, run_pm);
    205}
    206
    207int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev)
    208{
    209	int r = 0;
    210
    211	if (adev->kfd.dev)
    212		r = kgd2kfd_resume_iommu(adev->kfd.dev);
    213
    214	return r;
    215}
    216
    217int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
    218{
    219	int r = 0;
    220
    221	if (adev->kfd.dev)
    222		r = kgd2kfd_resume(adev->kfd.dev, run_pm);
    223
    224	return r;
    225}
    226
    227int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
    228{
    229	int r = 0;
    230
    231	if (adev->kfd.dev)
    232		r = kgd2kfd_pre_reset(adev->kfd.dev);
    233
    234	return r;
    235}
    236
    237int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
    238{
    239	int r = 0;
    240
    241	if (adev->kfd.dev)
    242		r = kgd2kfd_post_reset(adev->kfd.dev);
    243
    244	return r;
    245}
    246
    247void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
    248{
    249	if (amdgpu_device_should_recover_gpu(adev))
    250		amdgpu_device_gpu_recover(adev, NULL);
    251}
    252
    253int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
    254				void **mem_obj, uint64_t *gpu_addr,
    255				void **cpu_ptr, bool cp_mqd_gfx9)
    256{
    257	struct amdgpu_bo *bo = NULL;
    258	struct amdgpu_bo_param bp;
    259	int r;
    260	void *cpu_ptr_tmp = NULL;
    261
    262	memset(&bp, 0, sizeof(bp));
    263	bp.size = size;
    264	bp.byte_align = PAGE_SIZE;
    265	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
    266	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
    267	bp.type = ttm_bo_type_kernel;
    268	bp.resv = NULL;
    269	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
    270
    271	if (cp_mqd_gfx9)
    272		bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
    273
    274	r = amdgpu_bo_create(adev, &bp, &bo);
    275	if (r) {
    276		dev_err(adev->dev,
    277			"failed to allocate BO for amdkfd (%d)\n", r);
    278		return r;
    279	}
    280
    281	/* map the buffer */
    282	r = amdgpu_bo_reserve(bo, true);
    283	if (r) {
    284		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
    285		goto allocate_mem_reserve_bo_failed;
    286	}
    287
    288	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
    289	if (r) {
    290		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
    291		goto allocate_mem_pin_bo_failed;
    292	}
    293
    294	r = amdgpu_ttm_alloc_gart(&bo->tbo);
    295	if (r) {
    296		dev_err(adev->dev, "%p bind failed\n", bo);
    297		goto allocate_mem_kmap_bo_failed;
    298	}
    299
    300	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
    301	if (r) {
    302		dev_err(adev->dev,
    303			"(%d) failed to map bo to kernel for amdkfd\n", r);
    304		goto allocate_mem_kmap_bo_failed;
    305	}
    306
    307	*mem_obj = bo;
    308	*gpu_addr = amdgpu_bo_gpu_offset(bo);
    309	*cpu_ptr = cpu_ptr_tmp;
    310
    311	amdgpu_bo_unreserve(bo);
    312
    313	return 0;
    314
    315allocate_mem_kmap_bo_failed:
    316	amdgpu_bo_unpin(bo);
    317allocate_mem_pin_bo_failed:
    318	amdgpu_bo_unreserve(bo);
    319allocate_mem_reserve_bo_failed:
    320	amdgpu_bo_unref(&bo);
    321
    322	return r;
    323}
    324
    325void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj)
    326{
    327	struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
    328
    329	amdgpu_bo_reserve(bo, true);
    330	amdgpu_bo_kunmap(bo);
    331	amdgpu_bo_unpin(bo);
    332	amdgpu_bo_unreserve(bo);
    333	amdgpu_bo_unref(&(bo));
    334}
    335
    336int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
    337				void **mem_obj)
    338{
    339	struct amdgpu_bo *bo = NULL;
    340	struct amdgpu_bo_user *ubo;
    341	struct amdgpu_bo_param bp;
    342	int r;
    343
    344	memset(&bp, 0, sizeof(bp));
    345	bp.size = size;
    346	bp.byte_align = 1;
    347	bp.domain = AMDGPU_GEM_DOMAIN_GWS;
    348	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
    349	bp.type = ttm_bo_type_device;
    350	bp.resv = NULL;
    351	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
    352
    353	r = amdgpu_bo_create_user(adev, &bp, &ubo);
    354	if (r) {
    355		dev_err(adev->dev,
    356			"failed to allocate gws BO for amdkfd (%d)\n", r);
    357		return r;
    358	}
    359
    360	bo = &ubo->bo;
    361	*mem_obj = bo;
    362	return 0;
    363}
    364
    365void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj)
    366{
    367	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
    368
    369	amdgpu_bo_unref(&bo);
    370}
    371
    372uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
    373				      enum kgd_engine_type type)
    374{
    375	switch (type) {
    376	case KGD_ENGINE_PFP:
    377		return adev->gfx.pfp_fw_version;
    378
    379	case KGD_ENGINE_ME:
    380		return adev->gfx.me_fw_version;
    381
    382	case KGD_ENGINE_CE:
    383		return adev->gfx.ce_fw_version;
    384
    385	case KGD_ENGINE_MEC1:
    386		return adev->gfx.mec_fw_version;
    387
    388	case KGD_ENGINE_MEC2:
    389		return adev->gfx.mec2_fw_version;
    390
    391	case KGD_ENGINE_RLC:
    392		return adev->gfx.rlc_fw_version;
    393
    394	case KGD_ENGINE_SDMA1:
    395		return adev->sdma.instance[0].fw_version;
    396
    397	case KGD_ENGINE_SDMA2:
    398		return adev->sdma.instance[1].fw_version;
    399
    400	default:
    401		return 0;
    402	}
    403
    404	return 0;
    405}
    406
    407void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
    408				      struct kfd_local_mem_info *mem_info)
    409{
    410	memset(mem_info, 0, sizeof(*mem_info));
    411
    412	mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
    413	mem_info->local_mem_size_private = adev->gmc.real_vram_size -
    414						adev->gmc.visible_vram_size;
    415
    416	mem_info->vram_width = adev->gmc.vram_width;
    417
    418	pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
    419			&adev->gmc.aper_base,
    420			mem_info->local_mem_size_public,
    421			mem_info->local_mem_size_private);
    422
    423	if (amdgpu_sriov_vf(adev))
    424		mem_info->mem_clk_max = adev->clock.default_mclk / 100;
    425	else if (adev->pm.dpm_enabled) {
    426		if (amdgpu_emu_mode == 1)
    427			mem_info->mem_clk_max = 0;
    428		else
    429			mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
    430	} else
    431		mem_info->mem_clk_max = 100;
    432}
    433
    434uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev)
    435{
    436	if (adev->gfx.funcs->get_gpu_clock_counter)
    437		return adev->gfx.funcs->get_gpu_clock_counter(adev);
    438	return 0;
    439}
    440
    441uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
    442{
    443	/* the sclk is in quantas of 10kHz */
    444	if (amdgpu_sriov_vf(adev))
    445		return adev->clock.default_sclk / 100;
    446	else if (adev->pm.dpm_enabled)
    447		return amdgpu_dpm_get_sclk(adev, false) / 100;
    448	else
    449		return 100;
    450}
    451
    452void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info)
    453{
    454	struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
    455
    456	memset(cu_info, 0, sizeof(*cu_info));
    457	if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
    458		return;
    459
    460	cu_info->cu_active_number = acu_info.number;
    461	cu_info->cu_ao_mask = acu_info.ao_cu_mask;
    462	memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
    463	       sizeof(acu_info.bitmap));
    464	cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
    465	cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
    466	cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
    467	cu_info->simd_per_cu = acu_info.simd_per_cu;
    468	cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
    469	cu_info->wave_front_size = acu_info.wave_front_size;
    470	cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
    471	cu_info->lds_size = acu_info.lds_size;
    472}
    473
    474int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
    475				  struct amdgpu_device **dmabuf_adev,
    476				  uint64_t *bo_size, void *metadata_buffer,
    477				  size_t buffer_size, uint32_t *metadata_size,
    478				  uint32_t *flags)
    479{
    480	struct dma_buf *dma_buf;
    481	struct drm_gem_object *obj;
    482	struct amdgpu_bo *bo;
    483	uint64_t metadata_flags;
    484	int r = -EINVAL;
    485
    486	dma_buf = dma_buf_get(dma_buf_fd);
    487	if (IS_ERR(dma_buf))
    488		return PTR_ERR(dma_buf);
    489
    490	if (dma_buf->ops != &amdgpu_dmabuf_ops)
    491		/* Can't handle non-graphics buffers */
    492		goto out_put;
    493
    494	obj = dma_buf->priv;
    495	if (obj->dev->driver != adev_to_drm(adev)->driver)
    496		/* Can't handle buffers from different drivers */
    497		goto out_put;
    498
    499	adev = drm_to_adev(obj->dev);
    500	bo = gem_to_amdgpu_bo(obj);
    501	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
    502				    AMDGPU_GEM_DOMAIN_GTT)))
    503		/* Only VRAM and GTT BOs are supported */
    504		goto out_put;
    505
    506	r = 0;
    507	if (dmabuf_adev)
    508		*dmabuf_adev = adev;
    509	if (bo_size)
    510		*bo_size = amdgpu_bo_size(bo);
    511	if (metadata_buffer)
    512		r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
    513					   metadata_size, &metadata_flags);
    514	if (flags) {
    515		*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
    516				KFD_IOC_ALLOC_MEM_FLAGS_VRAM
    517				: KFD_IOC_ALLOC_MEM_FLAGS_GTT;
    518
    519		if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
    520			*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
    521	}
    522
    523out_put:
    524	dma_buf_put(dma_buf);
    525	return r;
    526}
    527
    528uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
    529					  struct amdgpu_device *src)
    530{
    531	struct amdgpu_device *peer_adev = src;
    532	struct amdgpu_device *adev = dst;
    533	int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
    534
    535	if (ret < 0) {
    536		DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n",
    537			adev->gmc.xgmi.physical_node_id,
    538			peer_adev->gmc.xgmi.physical_node_id, ret);
    539		ret = 0;
    540	}
    541	return  (uint8_t)ret;
    542}
    543
    544int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
    545					    struct amdgpu_device *src,
    546					    bool is_min)
    547{
    548	struct amdgpu_device *adev = dst, *peer_adev;
    549	int num_links;
    550
    551	if (adev->asic_type != CHIP_ALDEBARAN)
    552		return 0;
    553
    554	if (src)
    555		peer_adev = src;
    556
    557	/* num links returns 0 for indirect peers since indirect route is unknown. */
    558	num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev);
    559	if (num_links < 0) {
    560		DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",
    561			adev->gmc.xgmi.physical_node_id,
    562			peer_adev->gmc.xgmi.physical_node_id, num_links);
    563		num_links = 0;
    564	}
    565
    566	/* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */
    567	return (num_links * 16 * 25000)/BITS_PER_BYTE;
    568}
    569
    570int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min)
    571{
    572	int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
    573							fls(adev->pm.pcie_mlw_mask)) - 1;
    574	int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask &
    575						CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) :
    576					fls(adev->pm.pcie_gen_mask &
    577						CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1;
    578	uint32_t num_lanes_mask = 1 << num_lanes_shift;
    579	uint32_t gen_speed_mask = 1 << gen_speed_shift;
    580	int num_lanes_factor = 0, gen_speed_mbits_factor = 0;
    581
    582	switch (num_lanes_mask) {
    583	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1:
    584		num_lanes_factor = 1;
    585		break;
    586	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2:
    587		num_lanes_factor = 2;
    588		break;
    589	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4:
    590		num_lanes_factor = 4;
    591		break;
    592	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8:
    593		num_lanes_factor = 8;
    594		break;
    595	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12:
    596		num_lanes_factor = 12;
    597		break;
    598	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16:
    599		num_lanes_factor = 16;
    600		break;
    601	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32:
    602		num_lanes_factor = 32;
    603		break;
    604	}
    605
    606	switch (gen_speed_mask) {
    607	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1:
    608		gen_speed_mbits_factor = 2500;
    609		break;
    610	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2:
    611		gen_speed_mbits_factor = 5000;
    612		break;
    613	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3:
    614		gen_speed_mbits_factor = 8000;
    615		break;
    616	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4:
    617		gen_speed_mbits_factor = 16000;
    618		break;
    619	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5:
    620		gen_speed_mbits_factor = 32000;
    621		break;
    622	}
    623
    624	return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE;
    625}
    626
    627int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
    628				enum kgd_engine_type engine,
    629				uint32_t vmid, uint64_t gpu_addr,
    630				uint32_t *ib_cmd, uint32_t ib_len)
    631{
    632	struct amdgpu_job *job;
    633	struct amdgpu_ib *ib;
    634	struct amdgpu_ring *ring;
    635	struct dma_fence *f = NULL;
    636	int ret;
    637
    638	switch (engine) {
    639	case KGD_ENGINE_MEC1:
    640		ring = &adev->gfx.compute_ring[0];
    641		break;
    642	case KGD_ENGINE_SDMA1:
    643		ring = &adev->sdma.instance[0].ring;
    644		break;
    645	case KGD_ENGINE_SDMA2:
    646		ring = &adev->sdma.instance[1].ring;
    647		break;
    648	default:
    649		pr_err("Invalid engine in IB submission: %d\n", engine);
    650		ret = -EINVAL;
    651		goto err;
    652	}
    653
    654	ret = amdgpu_job_alloc(adev, 1, &job, NULL);
    655	if (ret)
    656		goto err;
    657
    658	ib = &job->ibs[0];
    659	memset(ib, 0, sizeof(struct amdgpu_ib));
    660
    661	ib->gpu_addr = gpu_addr;
    662	ib->ptr = ib_cmd;
    663	ib->length_dw = ib_len;
    664	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
    665	job->vmid = vmid;
    666
    667	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
    668
    669	if (ret) {
    670		DRM_ERROR("amdgpu: failed to schedule IB.\n");
    671		goto err_ib_sched;
    672	}
    673
    674	ret = dma_fence_wait(f, false);
    675
    676err_ib_sched:
    677	amdgpu_job_free(job);
    678err:
    679	return ret;
    680}
    681
    682void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
    683{
    684	amdgpu_dpm_switch_power_profile(adev,
    685					PP_SMC_POWER_PROFILE_COMPUTE,
    686					!idle);
    687}
    688
    689bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
    690{
    691	if (adev->kfd.dev)
    692		return vmid >= adev->vm_manager.first_kfd_vmid;
    693
    694	return false;
    695}
    696
    697int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
    698				     uint16_t vmid)
    699{
    700	if (adev->family == AMDGPU_FAMILY_AI) {
    701		int i;
    702
    703		for (i = 0; i < adev->num_vmhubs; i++)
    704			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
    705	} else {
    706		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
    707	}
    708
    709	return 0;
    710}
    711
    712int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
    713				      uint16_t pasid, enum TLB_FLUSH_TYPE flush_type)
    714{
    715	bool all_hub = false;
    716
    717	if (adev->family == AMDGPU_FAMILY_AI ||
    718	    adev->family == AMDGPU_FAMILY_RV)
    719		all_hub = true;
    720
    721	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
    722}
    723
    724bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
    725{
    726	return adev->have_atomics_support;
    727}
    728
    729void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
    730{
    731	struct ras_err_data err_data = {0, 0, 0, NULL};
    732
    733	/* CPU MCA will handle page retirement if connected_to_cpu is 1 */
    734	if (!adev->gmc.xgmi.connected_to_cpu)
    735		amdgpu_umc_poison_handler(adev, &err_data, reset);
    736	else if (reset)
    737		amdgpu_amdkfd_gpu_reset(adev);
    738}
    739
    740bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
    741{
    742	if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
    743		return adev->gfx.ras->query_utcl2_poison_status(adev);
    744	else
    745		return false;
    746}