cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

amdgpu_amdkfd_gfx_v8.c (17146B)


      1/*
      2 * Copyright 2014 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 */
     22
     23#include "amdgpu.h"
     24#include "amdgpu_amdkfd.h"
     25#include "gfx_v8_0.h"
     26#include "gca/gfx_8_0_sh_mask.h"
     27#include "gca/gfx_8_0_d.h"
     28#include "gca/gfx_8_0_enum.h"
     29#include "oss/oss_3_0_sh_mask.h"
     30#include "oss/oss_3_0_d.h"
     31#include "gmc/gmc_8_1_sh_mask.h"
     32#include "gmc/gmc_8_1_d.h"
     33#include "vi_structs.h"
     34#include "vid.h"
     35
     36enum hqd_dequeue_request_type {
     37	NO_ACTION = 0,
     38	DRAIN_PIPE,
     39	RESET_WAVES
     40};
     41
     42static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
     43			uint32_t queue, uint32_t vmid)
     44{
     45	uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
     46
     47	mutex_lock(&adev->srbm_mutex);
     48	WREG32(mmSRBM_GFX_CNTL, value);
     49}
     50
     51static void unlock_srbm(struct amdgpu_device *adev)
     52{
     53	WREG32(mmSRBM_GFX_CNTL, 0);
     54	mutex_unlock(&adev->srbm_mutex);
     55}
     56
     57static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
     58				uint32_t queue_id)
     59{
     60	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
     61	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
     62
     63	lock_srbm(adev, mec, pipe, queue_id, 0);
     64}
     65
     66static void release_queue(struct amdgpu_device *adev)
     67{
     68	unlock_srbm(adev);
     69}
     70
     71static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
     72					uint32_t sh_mem_config,
     73					uint32_t sh_mem_ape1_base,
     74					uint32_t sh_mem_ape1_limit,
     75					uint32_t sh_mem_bases)
     76{
     77	lock_srbm(adev, 0, 0, 0, vmid);
     78
     79	WREG32(mmSH_MEM_CONFIG, sh_mem_config);
     80	WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
     81	WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
     82	WREG32(mmSH_MEM_BASES, sh_mem_bases);
     83
     84	unlock_srbm(adev);
     85}
     86
     87static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
     88					unsigned int vmid)
     89{
     90	/*
     91	 * We have to assume that there is no outstanding mapping.
     92	 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
     93	 * a mapping is in progress or because a mapping finished
     94	 * and the SW cleared it.
     95	 * So the protocol is to always wait & clear.
     96	 */
     97	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
     98			ATC_VMID0_PASID_MAPPING__VALID_MASK;
     99
    100	WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
    101
    102	while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
    103		cpu_relax();
    104	WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
    105
    106	/* Mapping vmid to pasid also for IH block */
    107	WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
    108
    109	return 0;
    110}
    111
    112static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
    113{
    114	uint32_t mec;
    115	uint32_t pipe;
    116
    117	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
    118	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
    119
    120	lock_srbm(adev, mec, pipe, 0, 0);
    121
    122	WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
    123			CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
    124
    125	unlock_srbm(adev);
    126
    127	return 0;
    128}
    129
    130static inline uint32_t get_sdma_rlc_reg_offset(struct vi_sdma_mqd *m)
    131{
    132	uint32_t retval;
    133
    134	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
    135		m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
    136
    137	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
    138			m->sdma_engine_id, m->sdma_queue_id, retval);
    139
    140	return retval;
    141}
    142
    143static inline struct vi_mqd *get_mqd(void *mqd)
    144{
    145	return (struct vi_mqd *)mqd;
    146}
    147
    148static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
    149{
    150	return (struct vi_sdma_mqd *)mqd;
    151}
    152
    153static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
    154			uint32_t pipe_id, uint32_t queue_id,
    155			uint32_t __user *wptr, uint32_t wptr_shift,
    156			uint32_t wptr_mask, struct mm_struct *mm)
    157{
    158	struct vi_mqd *m;
    159	uint32_t *mqd_hqd;
    160	uint32_t reg, wptr_val, data;
    161	bool valid_wptr = false;
    162
    163	m = get_mqd(mqd);
    164
    165	acquire_queue(adev, pipe_id, queue_id);
    166
    167	/* HIQ is set during driver init period with vmid set to 0*/
    168	if (m->cp_hqd_vmid == 0) {
    169		uint32_t value, mec, pipe;
    170
    171		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
    172		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
    173
    174		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
    175			mec, pipe, queue_id);
    176		value = RREG32(mmRLC_CP_SCHEDULERS);
    177		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
    178			((mec << 5) | (pipe << 3) | queue_id | 0x80));
    179		WREG32(mmRLC_CP_SCHEDULERS, value);
    180	}
    181
    182	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
    183	mqd_hqd = &m->cp_mqd_base_addr_lo;
    184
    185	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
    186		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
    187
    188	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
    189	 * This is safe since EOP RPTR==WPTR for any inactive HQD
    190	 * on ASICs that do not support context-save.
    191	 * EOP writes/reads can start anywhere in the ring.
    192	 */
    193	if (adev->asic_type != CHIP_TONGA) {
    194		WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
    195		WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
    196		WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
    197	}
    198
    199	for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
    200		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
    201
    202	/* Copy userspace write pointer value to register.
    203	 * Activate doorbell logic to monitor subsequent changes.
    204	 */
    205	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
    206			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
    207	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
    208
    209	/* read_user_ptr may take the mm->mmap_lock.
    210	 * release srbm_mutex to avoid circular dependency between
    211	 * srbm_mutex->mmap_lock->reservation_ww_class_mutex->srbm_mutex.
    212	 */
    213	release_queue(adev);
    214	valid_wptr = read_user_wptr(mm, wptr, wptr_val);
    215	acquire_queue(adev, pipe_id, queue_id);
    216	if (valid_wptr)
    217		WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
    218
    219	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
    220	WREG32(mmCP_HQD_ACTIVE, data);
    221
    222	release_queue(adev);
    223
    224	return 0;
    225}
    226
    227static int kgd_hqd_dump(struct amdgpu_device *adev,
    228			uint32_t pipe_id, uint32_t queue_id,
    229			uint32_t (**dump)[2], uint32_t *n_regs)
    230{
    231	uint32_t i = 0, reg;
    232#define HQD_N_REGS (54+4)
    233#define DUMP_REG(addr) do {				\
    234		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
    235			break;				\
    236		(*dump)[i][0] = (addr) << 2;		\
    237		(*dump)[i++][1] = RREG32(addr);		\
    238	} while (0)
    239
    240	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
    241	if (*dump == NULL)
    242		return -ENOMEM;
    243
    244	acquire_queue(adev, pipe_id, queue_id);
    245
    246	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
    247	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
    248	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
    249	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
    250
    251	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
    252		DUMP_REG(reg);
    253
    254	release_queue(adev);
    255
    256	WARN_ON_ONCE(i != HQD_N_REGS);
    257	*n_regs = i;
    258
    259	return 0;
    260}
    261
    262static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
    263			     uint32_t __user *wptr, struct mm_struct *mm)
    264{
    265	struct vi_sdma_mqd *m;
    266	unsigned long end_jiffies;
    267	uint32_t sdma_rlc_reg_offset;
    268	uint32_t data;
    269
    270	m = get_sdma_mqd(mqd);
    271	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
    272	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
    273		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
    274
    275	end_jiffies = msecs_to_jiffies(2000) + jiffies;
    276	while (true) {
    277		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
    278		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
    279			break;
    280		if (time_after(jiffies, end_jiffies)) {
    281			pr_err("SDMA RLC not idle in %s\n", __func__);
    282			return -ETIME;
    283		}
    284		usleep_range(500, 1000);
    285	}
    286
    287	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
    288			     ENABLE, 1);
    289	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
    290	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
    291				m->sdmax_rlcx_rb_rptr);
    292
    293	if (read_user_wptr(mm, wptr, data))
    294		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
    295	else
    296		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
    297		       m->sdmax_rlcx_rb_rptr);
    298
    299	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
    300				m->sdmax_rlcx_virtual_addr);
    301	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
    302	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
    303			m->sdmax_rlcx_rb_base_hi);
    304	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
    305			m->sdmax_rlcx_rb_rptr_addr_lo);
    306	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
    307			m->sdmax_rlcx_rb_rptr_addr_hi);
    308
    309	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
    310			     RB_ENABLE, 1);
    311	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
    312
    313	return 0;
    314}
    315
    316static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
    317			     uint32_t engine_id, uint32_t queue_id,
    318			     uint32_t (**dump)[2], uint32_t *n_regs)
    319{
    320	uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
    321		queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
    322	uint32_t i = 0, reg;
    323#undef HQD_N_REGS
    324#define HQD_N_REGS (19+4+2+3+7)
    325
    326	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
    327	if (*dump == NULL)
    328		return -ENOMEM;
    329
    330	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
    331		DUMP_REG(sdma_offset + reg);
    332	for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
    333	     reg++)
    334		DUMP_REG(sdma_offset + reg);
    335	for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
    336	     reg++)
    337		DUMP_REG(sdma_offset + reg);
    338	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
    339	     reg++)
    340		DUMP_REG(sdma_offset + reg);
    341	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
    342	     reg++)
    343		DUMP_REG(sdma_offset + reg);
    344
    345	WARN_ON_ONCE(i != HQD_N_REGS);
    346	*n_regs = i;
    347
    348	return 0;
    349}
    350
    351static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
    352				uint64_t queue_address, uint32_t pipe_id,
    353				uint32_t queue_id)
    354{
    355	uint32_t act;
    356	bool retval = false;
    357	uint32_t low, high;
    358
    359	acquire_queue(adev, pipe_id, queue_id);
    360	act = RREG32(mmCP_HQD_ACTIVE);
    361	if (act) {
    362		low = lower_32_bits(queue_address >> 8);
    363		high = upper_32_bits(queue_address >> 8);
    364
    365		if (low == RREG32(mmCP_HQD_PQ_BASE) &&
    366				high == RREG32(mmCP_HQD_PQ_BASE_HI))
    367			retval = true;
    368	}
    369	release_queue(adev);
    370	return retval;
    371}
    372
    373static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
    374{
    375	struct vi_sdma_mqd *m;
    376	uint32_t sdma_rlc_reg_offset;
    377	uint32_t sdma_rlc_rb_cntl;
    378
    379	m = get_sdma_mqd(mqd);
    380	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
    381
    382	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
    383
    384	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
    385		return true;
    386
    387	return false;
    388}
    389
    390static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
    391				enum kfd_preempt_type reset_type,
    392				unsigned int utimeout, uint32_t pipe_id,
    393				uint32_t queue_id)
    394{
    395	uint32_t temp;
    396	enum hqd_dequeue_request_type type;
    397	unsigned long flags, end_jiffies;
    398	int retry;
    399	struct vi_mqd *m = get_mqd(mqd);
    400
    401	if (amdgpu_in_reset(adev))
    402		return -EIO;
    403
    404	acquire_queue(adev, pipe_id, queue_id);
    405
    406	if (m->cp_hqd_vmid == 0)
    407		WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
    408
    409	switch (reset_type) {
    410	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
    411		type = DRAIN_PIPE;
    412		break;
    413	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
    414		type = RESET_WAVES;
    415		break;
    416	default:
    417		type = DRAIN_PIPE;
    418		break;
    419	}
    420
    421	/* Workaround: If IQ timer is active and the wait time is close to or
    422	 * equal to 0, dequeueing is not safe. Wait until either the wait time
    423	 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
    424	 * cleared before continuing. Also, ensure wait times are set to at
    425	 * least 0x3.
    426	 */
    427	local_irq_save(flags);
    428	preempt_disable();
    429	retry = 5000; /* wait for 500 usecs at maximum */
    430	while (true) {
    431		temp = RREG32(mmCP_HQD_IQ_TIMER);
    432		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
    433			pr_debug("HW is processing IQ\n");
    434			goto loop;
    435		}
    436		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
    437			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
    438					== 3) /* SEM-rearm is safe */
    439				break;
    440			/* Wait time 3 is safe for CP, but our MMIO read/write
    441			 * time is close to 1 microsecond, so check for 10 to
    442			 * leave more buffer room
    443			 */
    444			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
    445					>= 10)
    446				break;
    447			pr_debug("IQ timer is active\n");
    448		} else
    449			break;
    450loop:
    451		if (!retry) {
    452			pr_err("CP HQD IQ timer status time out\n");
    453			break;
    454		}
    455		ndelay(100);
    456		--retry;
    457	}
    458	retry = 1000;
    459	while (true) {
    460		temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
    461		if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
    462			break;
    463		pr_debug("Dequeue request is pending\n");
    464
    465		if (!retry) {
    466			pr_err("CP HQD dequeue request time out\n");
    467			break;
    468		}
    469		ndelay(100);
    470		--retry;
    471	}
    472	local_irq_restore(flags);
    473	preempt_enable();
    474
    475	WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
    476
    477	end_jiffies = (utimeout * HZ / 1000) + jiffies;
    478	while (true) {
    479		temp = RREG32(mmCP_HQD_ACTIVE);
    480		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
    481			break;
    482		if (time_after(jiffies, end_jiffies)) {
    483			pr_err("cp queue preemption time out.\n");
    484			release_queue(adev);
    485			return -ETIME;
    486		}
    487		usleep_range(500, 1000);
    488	}
    489
    490	release_queue(adev);
    491	return 0;
    492}
    493
    494static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
    495				unsigned int utimeout)
    496{
    497	struct vi_sdma_mqd *m;
    498	uint32_t sdma_rlc_reg_offset;
    499	uint32_t temp;
    500	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
    501
    502	m = get_sdma_mqd(mqd);
    503	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
    504
    505	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
    506	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
    507	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
    508
    509	while (true) {
    510		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
    511		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
    512			break;
    513		if (time_after(jiffies, end_jiffies)) {
    514			pr_err("SDMA RLC not idle in %s\n", __func__);
    515			return -ETIME;
    516		}
    517		usleep_range(500, 1000);
    518	}
    519
    520	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
    521	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
    522		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
    523		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
    524
    525	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
    526
    527	return 0;
    528}
    529
    530static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
    531					uint8_t vmid, uint16_t *p_pasid)
    532{
    533	uint32_t value;
    534
    535	value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
    536	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
    537
    538	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
    539}
    540
    541static int kgd_wave_control_execute(struct amdgpu_device *adev,
    542					uint32_t gfx_index_val,
    543					uint32_t sq_cmd)
    544{
    545	uint32_t data = 0;
    546
    547	mutex_lock(&adev->grbm_idx_mutex);
    548
    549	WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
    550	WREG32(mmSQ_CMD, sq_cmd);
    551
    552	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
    553		INSTANCE_BROADCAST_WRITES, 1);
    554	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
    555		SH_BROADCAST_WRITES, 1);
    556	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
    557		SE_BROADCAST_WRITES, 1);
    558
    559	WREG32(mmGRBM_GFX_INDEX, data);
    560	mutex_unlock(&adev->grbm_idx_mutex);
    561
    562	return 0;
    563}
    564
    565static void set_scratch_backing_va(struct amdgpu_device *adev,
    566					uint64_t va, uint32_t vmid)
    567{
    568	lock_srbm(adev, 0, 0, 0, vmid);
    569	WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
    570	unlock_srbm(adev);
    571}
    572
    573static void set_vm_context_page_table_base(struct amdgpu_device *adev,
    574		uint32_t vmid, uint64_t page_table_base)
    575{
    576	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
    577		pr_err("trying to set page table base for wrong VMID\n");
    578		return;
    579	}
    580	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
    581			lower_32_bits(page_table_base));
    582}
    583
    584const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
    585	.program_sh_mem_settings = kgd_program_sh_mem_settings,
    586	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
    587	.init_interrupts = kgd_init_interrupts,
    588	.hqd_load = kgd_hqd_load,
    589	.hqd_sdma_load = kgd_hqd_sdma_load,
    590	.hqd_dump = kgd_hqd_dump,
    591	.hqd_sdma_dump = kgd_hqd_sdma_dump,
    592	.hqd_is_occupied = kgd_hqd_is_occupied,
    593	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
    594	.hqd_destroy = kgd_hqd_destroy,
    595	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
    596	.wave_control_execute = kgd_wave_control_execute,
    597	.get_atc_vmid_pasid_mapping_info =
    598			get_atc_vmid_pasid_mapping_info,
    599	.set_scratch_backing_va = set_scratch_backing_va,
    600	.set_vm_context_page_table_base = set_vm_context_page_table_base,
    601};