cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

amdgpu_amdkfd_gfx_v11.c (19242B)


      1/*
      2 * Copyright 2021 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 */
     22#include <linux/mmu_context.h>
     23#include "amdgpu.h"
     24#include "amdgpu_amdkfd.h"
     25#include "gc/gc_11_0_0_offset.h"
     26#include "gc/gc_11_0_0_sh_mask.h"
     27#include "oss/osssys_6_0_0_offset.h"
     28#include "oss/osssys_6_0_0_sh_mask.h"
     29#include "soc15_common.h"
     30#include "soc15d.h"
     31#include "v11_structs.h"
     32#include "soc21.h"
     33
     34enum hqd_dequeue_request_type {
     35	NO_ACTION = 0,
     36	DRAIN_PIPE,
     37	RESET_WAVES,
     38	SAVE_WAVES
     39};
     40
     41static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
     42			uint32_t queue, uint32_t vmid)
     43{
     44	mutex_lock(&adev->srbm_mutex);
     45	soc21_grbm_select(adev, mec, pipe, queue, vmid);
     46}
     47
     48static void unlock_srbm(struct amdgpu_device *adev)
     49{
     50	soc21_grbm_select(adev, 0, 0, 0, 0);
     51	mutex_unlock(&adev->srbm_mutex);
     52}
     53
     54static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
     55				uint32_t queue_id)
     56{
     57	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
     58	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
     59
     60	lock_srbm(adev, mec, pipe, queue_id, 0);
     61}
     62
     63static uint64_t get_queue_mask(struct amdgpu_device *adev,
     64			       uint32_t pipe_id, uint32_t queue_id)
     65{
     66	unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
     67			queue_id;
     68
     69	return 1ull << bit;
     70}
     71
     72static void release_queue(struct amdgpu_device *adev)
     73{
     74	unlock_srbm(adev);
     75}
     76
     77static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmid,
     78					uint32_t sh_mem_config,
     79					uint32_t sh_mem_ape1_base,
     80					uint32_t sh_mem_ape1_limit,
     81					uint32_t sh_mem_bases)
     82{
     83	lock_srbm(adev, 0, 0, 0, vmid);
     84
     85	WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_CONFIG), sh_mem_config);
     86	WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_BASES), sh_mem_bases);
     87
     88	unlock_srbm(adev);
     89}
     90
     91static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
     92					unsigned int vmid)
     93{
     94	uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
     95
     96	/* Mapping vmid to pasid also for IH block */
     97	pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
     98			vmid, pasid);
     99	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid, value);
    100
    101	return 0;
    102}
    103
    104static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)
    105{
    106	uint32_t mec;
    107	uint32_t pipe;
    108
    109	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
    110	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
    111
    112	lock_srbm(adev, mec, pipe, 0, 0);
    113
    114	WREG32(SOC15_REG_OFFSET(GC, 0, regCPC_INT_CNTL),
    115		CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
    116		CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
    117
    118	unlock_srbm(adev);
    119
    120	return 0;
    121}
    122
    123static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
    124				unsigned int engine_id,
    125				unsigned int queue_id)
    126{
    127	uint32_t sdma_engine_reg_base = 0;
    128	uint32_t sdma_rlc_reg_offset;
    129
    130	switch (engine_id) {
    131	case 0:
    132		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
    133				regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
    134		break;
    135	case 1:
    136		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
    137				regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
    138		break;
    139	default:
    140		BUG();
    141	}
    142
    143	sdma_rlc_reg_offset = sdma_engine_reg_base
    144		+ queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
    145
    146	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
    147			queue_id, sdma_rlc_reg_offset);
    148
    149	return sdma_rlc_reg_offset;
    150}
    151
    152static inline struct v11_compute_mqd *get_mqd(void *mqd)
    153{
    154	return (struct v11_compute_mqd *)mqd;
    155}
    156
    157static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
    158{
    159	return (struct v11_sdma_mqd *)mqd;
    160}
    161
    162static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
    163			uint32_t queue_id, uint32_t __user *wptr,
    164			uint32_t wptr_shift, uint32_t wptr_mask,
    165			struct mm_struct *mm)
    166{
    167	struct v11_compute_mqd *m;
    168	uint32_t *mqd_hqd;
    169	uint32_t reg, hqd_base, data;
    170
    171	m = get_mqd(mqd);
    172
    173	pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
    174	acquire_queue(adev, pipe_id, queue_id);
    175
    176	/* HIQ is set during driver init period with vmid set to 0*/
    177	if (m->cp_hqd_vmid == 0) {
    178		uint32_t value, mec, pipe;
    179
    180		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
    181		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
    182
    183		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
    184			mec, pipe, queue_id);
    185		value = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS));
    186		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
    187			((mec << 5) | (pipe << 3) | queue_id | 0x80));
    188		WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS), value);
    189	}
    190
    191	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
    192	mqd_hqd = &m->cp_mqd_base_addr_lo;
    193	hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
    194
    195	for (reg = hqd_base;
    196	     reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
    197		WREG32(reg, mqd_hqd[reg - hqd_base]);
    198
    199
    200	/* Activate doorbell logic before triggering WPTR poll. */
    201	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
    202			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
    203	WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), data);
    204
    205	if (wptr) {
    206		/* Don't read wptr with get_user because the user
    207		 * context may not be accessible (if this function
    208		 * runs in a work queue). Instead trigger a one-shot
    209		 * polling read from memory in the CP. This assumes
    210		 * that wptr is GPU-accessible in the queue's VMID via
    211		 * ATC or SVM. WPTR==RPTR before starting the poll so
    212		 * the CP starts fetching new commands from the right
    213		 * place.
    214		 *
    215		 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
    216		 * tricky. Assume that the queue didn't overflow. The
    217		 * number of valid bits in the 32-bit RPTR depends on
    218		 * the queue size. The remaining bits are taken from
    219		 * the saved 64-bit WPTR. If the WPTR wrapped, add the
    220		 * queue size.
    221		 */
    222		uint32_t queue_size =
    223			2 << REG_GET_FIELD(m->cp_hqd_pq_control,
    224					   CP_HQD_PQ_CONTROL, QUEUE_SIZE);
    225		uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
    226
    227		if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
    228			guessed_wptr += queue_size;
    229		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
    230		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
    231
    232		WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO),
    233		       lower_32_bits(guessed_wptr));
    234		WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI),
    235		       upper_32_bits(guessed_wptr));
    236		WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
    237		       lower_32_bits((uint64_t)wptr));
    238		WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
    239		       upper_32_bits((uint64_t)wptr));
    240		pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
    241			 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
    242		WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1),
    243		       (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
    244	}
    245
    246	/* Start the EOP fetcher */
    247	WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR),
    248	       REG_SET_FIELD(m->cp_hqd_eop_rptr,
    249			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
    250
    251	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
    252	WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data);
    253
    254	release_queue(adev);
    255
    256	return 0;
    257}
    258
    259static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
    260			      uint32_t pipe_id, uint32_t queue_id,
    261			      uint32_t doorbell_off)
    262{
    263	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
    264	struct v11_compute_mqd *m;
    265	uint32_t mec, pipe;
    266	int r;
    267
    268	m = get_mqd(mqd);
    269
    270	acquire_queue(adev, pipe_id, queue_id);
    271
    272	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
    273	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
    274
    275	pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
    276		 mec, pipe, queue_id);
    277
    278	spin_lock(&adev->gfx.kiq.ring_lock);
    279	r = amdgpu_ring_alloc(kiq_ring, 7);
    280	if (r) {
    281		pr_err("Failed to alloc KIQ (%d).\n", r);
    282		goto out_unlock;
    283	}
    284
    285	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
    286	amdgpu_ring_write(kiq_ring,
    287			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
    288			  PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
    289			  PACKET3_MAP_QUEUES_QUEUE(queue_id) |
    290			  PACKET3_MAP_QUEUES_PIPE(pipe) |
    291			  PACKET3_MAP_QUEUES_ME((mec - 1)) |
    292			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
    293			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
    294			  PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
    295			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
    296	amdgpu_ring_write(kiq_ring,
    297			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
    298	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
    299	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
    300	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
    301	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
    302	amdgpu_ring_commit(kiq_ring);
    303
    304out_unlock:
    305	spin_unlock(&adev->gfx.kiq.ring_lock);
    306	release_queue(adev);
    307
    308	return r;
    309}
    310
    311static int hqd_dump_v11(struct amdgpu_device *adev,
    312			uint32_t pipe_id, uint32_t queue_id,
    313			uint32_t (**dump)[2], uint32_t *n_regs)
    314{
    315	uint32_t i = 0, reg;
    316#define HQD_N_REGS 56
    317#define DUMP_REG(addr) do {				\
    318		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
    319			break;				\
    320		(*dump)[i][0] = (addr) << 2;		\
    321		(*dump)[i++][1] = RREG32(addr);		\
    322	} while (0)
    323
    324	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
    325	if (*dump == NULL)
    326		return -ENOMEM;
    327
    328	acquire_queue(adev, pipe_id, queue_id);
    329
    330	for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
    331	     reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
    332		DUMP_REG(reg);
    333
    334	release_queue(adev);
    335
    336	WARN_ON_ONCE(i != HQD_N_REGS);
    337	*n_regs = i;
    338
    339	return 0;
    340}
    341
    342static int hqd_sdma_load_v11(struct amdgpu_device *adev, void *mqd,
    343			     uint32_t __user *wptr, struct mm_struct *mm)
    344{
    345	struct v11_sdma_mqd *m;
    346	uint32_t sdma_rlc_reg_offset;
    347	unsigned long end_jiffies;
    348	uint32_t data;
    349	uint64_t data64;
    350	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
    351
    352	m = get_sdma_mqd(mqd);
    353	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
    354					    m->sdma_queue_id);
    355
    356	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
    357		m->sdmax_rlcx_rb_cntl & (~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK));
    358
    359	end_jiffies = msecs_to_jiffies(2000) + jiffies;
    360	while (true) {
    361		data = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
    362		if (data & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
    363			break;
    364		if (time_after(jiffies, end_jiffies)) {
    365			pr_err("SDMA RLC not idle in %s\n", __func__);
    366			return -ETIME;
    367		}
    368		usleep_range(500, 1000);
    369	}
    370
    371	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL_OFFSET,
    372	       m->sdmax_rlcx_doorbell_offset);
    373
    374	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_QUEUE0_DOORBELL,
    375			     ENABLE, 1);
    376	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, data);
    377	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR,
    378				m->sdmax_rlcx_rb_rptr);
    379	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI,
    380				m->sdmax_rlcx_rb_rptr_hi);
    381
    382	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 1);
    383	if (read_user_wptr(mm, wptr64, data64)) {
    384		WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
    385		       lower_32_bits(data64));
    386		WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
    387		       upper_32_bits(data64));
    388	} else {
    389		WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
    390		       m->sdmax_rlcx_rb_rptr);
    391		WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
    392		       m->sdmax_rlcx_rb_rptr_hi);
    393	}
    394	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 0);
    395
    396	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE, m->sdmax_rlcx_rb_base);
    397	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE_HI,
    398			m->sdmax_rlcx_rb_base_hi);
    399	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_LO,
    400			m->sdmax_rlcx_rb_rptr_addr_lo);
    401	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_HI,
    402			m->sdmax_rlcx_rb_rptr_addr_hi);
    403
    404	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_QUEUE0_RB_CNTL,
    405			     RB_ENABLE, 1);
    406	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, data);
    407
    408	return 0;
    409}
    410
    411static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
    412			     uint32_t engine_id, uint32_t queue_id,
    413			     uint32_t (**dump)[2], uint32_t *n_regs)
    414{
    415	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
    416			engine_id, queue_id);
    417	uint32_t i = 0, reg;
    418#undef HQD_N_REGS
    419#define HQD_N_REGS (7+11+1+12+12)
    420
    421	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
    422	if (*dump == NULL)
    423		return -ENOMEM;
    424
    425	for (reg = regSDMA0_QUEUE0_RB_CNTL;
    426	     reg <= regSDMA0_QUEUE0_RB_WPTR_HI; reg++)
    427		DUMP_REG(sdma_rlc_reg_offset + reg);
    428	for (reg = regSDMA0_QUEUE0_RB_RPTR_ADDR_HI;
    429	     reg <= regSDMA0_QUEUE0_DOORBELL; reg++)
    430		DUMP_REG(sdma_rlc_reg_offset + reg);
    431	for (reg = regSDMA0_QUEUE0_DOORBELL_LOG;
    432	     reg <= regSDMA0_QUEUE0_DOORBELL_LOG; reg++)
    433		DUMP_REG(sdma_rlc_reg_offset + reg);
    434	for (reg = regSDMA0_QUEUE0_DOORBELL_OFFSET;
    435	     reg <= regSDMA0_QUEUE0_RB_PREEMPT; reg++)
    436		DUMP_REG(sdma_rlc_reg_offset + reg);
    437	for (reg = regSDMA0_QUEUE0_MIDCMD_DATA0;
    438	     reg <= regSDMA0_QUEUE0_MIDCMD_CNTL; reg++)
    439		DUMP_REG(sdma_rlc_reg_offset + reg);
    440
    441	WARN_ON_ONCE(i != HQD_N_REGS);
    442	*n_regs = i;
    443
    444	return 0;
    445}
    446
    447static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
    448				uint32_t pipe_id, uint32_t queue_id)
    449{
    450	uint32_t act;
    451	bool retval = false;
    452	uint32_t low, high;
    453
    454	acquire_queue(adev, pipe_id, queue_id);
    455	act = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
    456	if (act) {
    457		low = lower_32_bits(queue_address >> 8);
    458		high = upper_32_bits(queue_address >> 8);
    459
    460		if (low == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE)) &&
    461		   high == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE_HI)))
    462			retval = true;
    463	}
    464	release_queue(adev);
    465	return retval;
    466}
    467
    468static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
    469{
    470	struct v11_sdma_mqd *m;
    471	uint32_t sdma_rlc_reg_offset;
    472	uint32_t sdma_rlc_rb_cntl;
    473
    474	m = get_sdma_mqd(mqd);
    475	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
    476					    m->sdma_queue_id);
    477
    478	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
    479
    480	if (sdma_rlc_rb_cntl & SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK)
    481		return true;
    482
    483	return false;
    484}
    485
    486static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
    487				enum kfd_preempt_type reset_type,
    488				unsigned int utimeout, uint32_t pipe_id,
    489				uint32_t queue_id)
    490{
    491	enum hqd_dequeue_request_type type;
    492	unsigned long end_jiffies;
    493	uint32_t temp;
    494	struct v11_compute_mqd *m = get_mqd(mqd);
    495
    496	acquire_queue(adev, pipe_id, queue_id);
    497
    498	if (m->cp_hqd_vmid == 0)
    499		WREG32_FIELD15_PREREG(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
    500
    501	switch (reset_type) {
    502	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
    503		type = DRAIN_PIPE;
    504		break;
    505	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
    506		type = RESET_WAVES;
    507		break;
    508	default:
    509		type = DRAIN_PIPE;
    510		break;
    511	}
    512
    513	WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_DEQUEUE_REQUEST), type);
    514
    515	end_jiffies = (utimeout * HZ / 1000) + jiffies;
    516	while (true) {
    517		temp = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
    518		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
    519			break;
    520		if (time_after(jiffies, end_jiffies)) {
    521			pr_err("cp queue pipe %d queue %d preemption failed\n",
    522					pipe_id, queue_id);
    523			release_queue(adev);
    524			return -ETIME;
    525		}
    526		usleep_range(500, 1000);
    527	}
    528
    529	release_queue(adev);
    530	return 0;
    531}
    532
    533static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
    534				unsigned int utimeout)
    535{
    536	struct v11_sdma_mqd *m;
    537	uint32_t sdma_rlc_reg_offset;
    538	uint32_t temp;
    539	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
    540
    541	m = get_sdma_mqd(mqd);
    542	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
    543					    m->sdma_queue_id);
    544
    545	temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
    546	temp = temp & ~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK;
    547	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, temp);
    548
    549	while (true) {
    550		temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
    551		if (temp & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
    552			break;
    553		if (time_after(jiffies, end_jiffies)) {
    554			pr_err("SDMA RLC not idle in %s\n", __func__);
    555			return -ETIME;
    556		}
    557		usleep_range(500, 1000);
    558	}
    559
    560	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, 0);
    561	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
    562		RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL) |
    563		SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK);
    564
    565	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR);
    566	m->sdmax_rlcx_rb_rptr_hi =
    567		RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI);
    568
    569	return 0;
    570}
    571
    572static int wave_control_execute_v11(struct amdgpu_device *adev,
    573					uint32_t gfx_index_val,
    574					uint32_t sq_cmd)
    575{
    576	uint32_t data = 0;
    577
    578	mutex_lock(&adev->grbm_idx_mutex);
    579
    580	WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
    581	WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
    582
    583	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
    584		INSTANCE_BROADCAST_WRITES, 1);
    585	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
    586		SA_BROADCAST_WRITES, 1);
    587	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
    588		SE_BROADCAST_WRITES, 1);
    589
    590	WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
    591	mutex_unlock(&adev->grbm_idx_mutex);
    592
    593	return 0;
    594}
    595
    596static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev,
    597		uint32_t vmid, uint64_t page_table_base)
    598{
    599	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
    600		pr_err("trying to set page table base for wrong VMID %u\n",
    601		       vmid);
    602		return;
    603	}
    604
    605	/* SDMA is on gfxhub as well for gfx11 adapters */
    606	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
    607}
    608
    609const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
    610	.program_sh_mem_settings = program_sh_mem_settings_v11,
    611	.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
    612	.init_interrupts = init_interrupts_v11,
    613	.hqd_load = hqd_load_v11,
    614	.hiq_mqd_load = hiq_mqd_load_v11,
    615	.hqd_sdma_load = hqd_sdma_load_v11,
    616	.hqd_dump = hqd_dump_v11,
    617	.hqd_sdma_dump = hqd_sdma_dump_v11,
    618	.hqd_is_occupied = hqd_is_occupied_v11,
    619	.hqd_sdma_is_occupied = hqd_sdma_is_occupied_v11,
    620	.hqd_destroy = hqd_destroy_v11,
    621	.hqd_sdma_destroy = hqd_sdma_destroy_v11,
    622	.wave_control_execute = wave_control_execute_v11,
    623	.get_atc_vmid_pasid_mapping_info = NULL,
    624	.set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
    625};