cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

gfx_v11_0.c (201480B)


      1/*
      2 * Copyright 2021 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 */
     23#include <linux/delay.h>
     24#include <linux/kernel.h>
     25#include <linux/firmware.h>
     26#include <linux/module.h>
     27#include <linux/pci.h>
     28#include "amdgpu.h"
     29#include "amdgpu_gfx.h"
     30#include "amdgpu_psp.h"
     31#include "amdgpu_smu.h"
     32#include "amdgpu_atomfirmware.h"
     33#include "imu_v11_0.h"
     34#include "soc21.h"
     35#include "nvd.h"
     36
     37#include "gc/gc_11_0_0_offset.h"
     38#include "gc/gc_11_0_0_sh_mask.h"
     39#include "smuio/smuio_13_0_6_offset.h"
     40#include "smuio/smuio_13_0_6_sh_mask.h"
     41#include "navi10_enum.h"
     42#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
     43
     44#include "soc15.h"
     45#include "soc15d.h"
     46#include "clearstate_gfx11.h"
     47#include "v11_structs.h"
     48#include "gfx_v11_0.h"
     49#include "nbio_v4_3.h"
     50#include "mes_v11_0.h"
     51
     52#define GFX11_NUM_GFX_RINGS		1
     53#define GFX11_MEC_HPD_SIZE	2048
     54
     55#define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
     56
     57#define regCGTT_WD_CLK_CTRL		0x5086
     58#define regCGTT_WD_CLK_CTRL_BASE_IDX	1
     59
     60MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
     61MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
     62MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
     63MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
     64MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
     65MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
     66MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
     67MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
     68MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
     69MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
     70MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
     71MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
     72MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
     73
     74static const struct soc15_reg_golden golden_settings_gc_11_0[] =
     75{
     76	/* Pending on emulation bring up */
     77};
     78
     79static const struct soc15_reg_golden golden_settings_gc_11_0_0[] =
     80{
     81	/* Pending on emulation bring up */
     82};
     83
     84static const struct soc15_reg_golden golden_settings_gc_rlc_spm_11_0[] =
     85{
     86	/* Pending on emulation bring up */
     87};
     88
     89static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
     90{
     91	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
     92	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
     93	SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
     94	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
     95	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
     96	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
     97	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
     98	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
     99	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
    100};
    101
    102#define DEFAULT_SH_MEM_CONFIG \
    103	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
    104	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
    105	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
    106
    107static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
    108static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
    109static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
    110static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
    111static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
    112static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
    113static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
    114static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
    115                                 struct amdgpu_cu_info *cu_info);
    116static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
    117static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
    118				   u32 sh_num, u32 instance);
    119static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
    120
    121static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
    122static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
    123static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
    124				     uint32_t val);
    125static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
    126static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
    127					   uint16_t pasid, uint32_t flush_type,
    128					   bool all_hub, uint8_t dst_sel);
    129
    130static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
    131{
    132	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
    133	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
    134			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
    135	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
    136	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
    137	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
    138	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
    139	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
    140	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
    141}
    142
    143static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
    144				 struct amdgpu_ring *ring)
    145{
    146	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
    147	uint64_t wptr_addr = ring->wptr_gpu_addr;
    148	uint32_t me = 0, eng_sel = 0;
    149
    150	switch (ring->funcs->type) {
    151	case AMDGPU_RING_TYPE_COMPUTE:
    152		me = 1;
    153		eng_sel = 0;
    154		break;
    155	case AMDGPU_RING_TYPE_GFX:
    156		me = 0;
    157		eng_sel = 4;
    158		break;
    159	case AMDGPU_RING_TYPE_MES:
    160		me = 2;
    161		eng_sel = 5;
    162		break;
    163	default:
    164		WARN_ON(1);
    165	}
    166
    167	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
    168	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
    169	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
    170			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
    171			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
    172			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
    173			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
    174			  PACKET3_MAP_QUEUES_ME((me)) |
    175			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
    176			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
    177			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
    178			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
    179	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
    180	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
    181	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
    182	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
    183	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
    184}
    185
    186static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
    187				   struct amdgpu_ring *ring,
    188				   enum amdgpu_unmap_queues_action action,
    189				   u64 gpu_addr, u64 seq)
    190{
    191	struct amdgpu_device *adev = kiq_ring->adev;
    192	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
    193
    194	if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
    195		amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
    196		return;
    197	}
    198
    199	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
    200	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
    201			  PACKET3_UNMAP_QUEUES_ACTION(action) |
    202			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
    203			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
    204			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
    205	amdgpu_ring_write(kiq_ring,
    206		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
    207
    208	if (action == PREEMPT_QUEUES_NO_UNMAP) {
    209		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
    210		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
    211		amdgpu_ring_write(kiq_ring, seq);
    212	} else {
    213		amdgpu_ring_write(kiq_ring, 0);
    214		amdgpu_ring_write(kiq_ring, 0);
    215		amdgpu_ring_write(kiq_ring, 0);
    216	}
    217}
    218
    219static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
    220				   struct amdgpu_ring *ring,
    221				   u64 addr,
    222				   u64 seq)
    223{
    224	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
    225
    226	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
    227	amdgpu_ring_write(kiq_ring,
    228			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
    229			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
    230			  PACKET3_QUERY_STATUS_COMMAND(2));
    231	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
    232			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
    233			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
    234	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
    235	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
    236	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
    237	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
    238}
    239
    240static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
    241				uint16_t pasid, uint32_t flush_type,
    242				bool all_hub)
    243{
    244	gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
    245}
    246
    247static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
    248	.kiq_set_resources = gfx11_kiq_set_resources,
    249	.kiq_map_queues = gfx11_kiq_map_queues,
    250	.kiq_unmap_queues = gfx11_kiq_unmap_queues,
    251	.kiq_query_status = gfx11_kiq_query_status,
    252	.kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
    253	.set_resources_size = 8,
    254	.map_queues_size = 7,
    255	.unmap_queues_size = 6,
    256	.query_status_size = 7,
    257	.invalidate_tlbs_size = 2,
    258};
    259
    260static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
    261{
    262	adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs;
    263}
    264
    265static void gfx_v11_0_init_spm_golden_registers(struct amdgpu_device *adev)
    266{
    267	switch (adev->ip_versions[GC_HWIP][0]) {
    268	case IP_VERSION(11, 0, 0):
    269		soc15_program_register_sequence(adev,
    270						golden_settings_gc_rlc_spm_11_0,
    271						(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_11_0));
    272		break;
    273	default:
    274		break;
    275	}
    276}
    277
    278static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
    279{
    280	switch (adev->ip_versions[GC_HWIP][0]) {
    281	case IP_VERSION(11, 0, 0):
    282		soc15_program_register_sequence(adev,
    283						golden_settings_gc_11_0,
    284						(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
    285		soc15_program_register_sequence(adev,
    286						golden_settings_gc_11_0_0,
    287						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_0));
    288		break;
    289	case IP_VERSION(11, 0, 1):
    290		soc15_program_register_sequence(adev,
    291						golden_settings_gc_11_0,
    292						(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
    293		soc15_program_register_sequence(adev,
    294						golden_settings_gc_11_0_1,
    295						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
    296		break;
    297	default:
    298		break;
    299	}
    300	gfx_v11_0_init_spm_golden_registers(adev);
    301}
    302
    303static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
    304				       bool wc, uint32_t reg, uint32_t val)
    305{
    306	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
    307	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
    308			  WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
    309	amdgpu_ring_write(ring, reg);
    310	amdgpu_ring_write(ring, 0);
    311	amdgpu_ring_write(ring, val);
    312}
    313
    314static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
    315				  int mem_space, int opt, uint32_t addr0,
    316				  uint32_t addr1, uint32_t ref, uint32_t mask,
    317				  uint32_t inv)
    318{
    319	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
    320	amdgpu_ring_write(ring,
    321			  /* memory (1) or register (0) */
    322			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
    323			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
    324			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
    325			   WAIT_REG_MEM_ENGINE(eng_sel)));
    326
    327	if (mem_space)
    328		BUG_ON(addr0 & 0x3); /* Dword align */
    329	amdgpu_ring_write(ring, addr0);
    330	amdgpu_ring_write(ring, addr1);
    331	amdgpu_ring_write(ring, ref);
    332	amdgpu_ring_write(ring, mask);
    333	amdgpu_ring_write(ring, inv); /* poll interval */
    334}
    335
    336static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
    337{
    338	struct amdgpu_device *adev = ring->adev;
    339	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
    340	uint32_t tmp = 0;
    341	unsigned i;
    342	int r;
    343
    344	WREG32(scratch, 0xCAFEDEAD);
    345	r = amdgpu_ring_alloc(ring, 5);
    346	if (r) {
    347		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
    348			  ring->idx, r);
    349		return r;
    350	}
    351
    352	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
    353		gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
    354	} else {
    355		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
    356		amdgpu_ring_write(ring, scratch -
    357				  PACKET3_SET_UCONFIG_REG_START);
    358		amdgpu_ring_write(ring, 0xDEADBEEF);
    359	}
    360	amdgpu_ring_commit(ring);
    361
    362	for (i = 0; i < adev->usec_timeout; i++) {
    363		tmp = RREG32(scratch);
    364		if (tmp == 0xDEADBEEF)
    365			break;
    366		if (amdgpu_emu_mode == 1)
    367			msleep(1);
    368		else
    369			udelay(1);
    370	}
    371
    372	if (i >= adev->usec_timeout)
    373		r = -ETIMEDOUT;
    374	return r;
    375}
    376
    377static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
    378{
    379	struct amdgpu_device *adev = ring->adev;
    380	struct amdgpu_ib ib;
    381	struct dma_fence *f = NULL;
    382	unsigned index;
    383	uint64_t gpu_addr;
    384	volatile uint32_t *cpu_ptr;
    385	long r;
    386
    387	/* MES KIQ fw hasn't indirect buffer support for now */
    388	if (adev->enable_mes_kiq &&
    389	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
    390		return 0;
    391
    392	memset(&ib, 0, sizeof(ib));
    393
    394	if (ring->is_mes_queue) {
    395		uint32_t padding, offset;
    396
    397		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
    398		padding = amdgpu_mes_ctx_get_offs(ring,
    399						  AMDGPU_MES_CTX_PADDING_OFFS);
    400
    401		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
    402		ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
    403
    404		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
    405		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
    406		*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
    407	} else {
    408		r = amdgpu_device_wb_get(adev, &index);
    409		if (r)
    410			return r;
    411
    412		gpu_addr = adev->wb.gpu_addr + (index * 4);
    413		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
    414		cpu_ptr = &adev->wb.wb[index];
    415
    416		r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
    417		if (r) {
    418			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
    419			goto err1;
    420		}
    421	}
    422
    423	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
    424	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
    425	ib.ptr[2] = lower_32_bits(gpu_addr);
    426	ib.ptr[3] = upper_32_bits(gpu_addr);
    427	ib.ptr[4] = 0xDEADBEEF;
    428	ib.length_dw = 5;
    429
    430	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
    431	if (r)
    432		goto err2;
    433
    434	r = dma_fence_wait_timeout(f, false, timeout);
    435	if (r == 0) {
    436		r = -ETIMEDOUT;
    437		goto err2;
    438	} else if (r < 0) {
    439		goto err2;
    440	}
    441
    442	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
    443		r = 0;
    444	else
    445		r = -EINVAL;
    446err2:
    447	if (!ring->is_mes_queue)
    448		amdgpu_ib_free(adev, &ib, NULL);
    449	dma_fence_put(f);
    450err1:
    451	if (!ring->is_mes_queue)
    452		amdgpu_device_wb_free(adev, index);
    453	return r;
    454}
    455
    456static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
    457{
    458	release_firmware(adev->gfx.pfp_fw);
    459	adev->gfx.pfp_fw = NULL;
    460	release_firmware(adev->gfx.me_fw);
    461	adev->gfx.me_fw = NULL;
    462	release_firmware(adev->gfx.rlc_fw);
    463	adev->gfx.rlc_fw = NULL;
    464	release_firmware(adev->gfx.mec_fw);
    465	adev->gfx.mec_fw = NULL;
    466
    467	kfree(adev->gfx.rlc.register_list_format);
    468}
    469
    470static void gfx_v11_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
    471{
    472	const struct rlc_firmware_header_v2_1 *rlc_hdr;
    473
    474	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
    475	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
    476	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
    477	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
    478	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
    479	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
    480	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
    481	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
    482	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
    483	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
    484	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
    485	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
    486	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
    487	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
    488			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
    489}
    490
    491static void gfx_v11_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev)
    492{
    493	const struct rlc_firmware_header_v2_2 *rlc_hdr;
    494
    495	rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
    496	adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
    497	adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
    498	adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
    499	adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
    500}
    501
    502static void gfx_v11_0_init_rlcp_rlcv_microcode(struct amdgpu_device *adev)
    503{
    504	const struct rlc_firmware_header_v2_3 *rlc_hdr;
    505
    506	rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
    507	adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes);
    508	adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes);
    509	adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes);
    510	adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes);
    511}
    512
    513static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
    514{
    515	char fw_name[40];
    516	char ucode_prefix[30];
    517	int err;
    518	struct amdgpu_firmware_info *info = NULL;
    519	const struct common_firmware_header *header = NULL;
    520	const struct gfx_firmware_header_v1_0 *cp_hdr;
    521	const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
    522	const struct rlc_firmware_header_v2_0 *rlc_hdr;
    523	unsigned int *tmp = NULL;
    524	unsigned int i = 0;
    525	uint16_t version_major;
    526	uint16_t version_minor;
    527
    528	DRM_DEBUG("\n");
    529
    530	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
    531
    532	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
    533	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
    534	if (err)
    535		goto out;
    536	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
    537	if (err)
    538		goto out;
    539	/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
    540	adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
    541				(union amdgpu_firmware_header *)
    542				adev->gfx.pfp_fw->data, 2, 0);
    543	if (adev->gfx.rs64_enable) {
    544		dev_info(adev->dev, "CP RS64 enable\n");
    545		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data;
    546		adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
    547		adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
    548		
    549	} else {
    550		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
    551		adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
    552		adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
    553	}
    554
    555	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
    556	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
    557	if (err)
    558		goto out;
    559	err = amdgpu_ucode_validate(adev->gfx.me_fw);
    560	if (err)
    561		goto out;
    562	if (adev->gfx.rs64_enable) {
    563		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data;
    564		adev->gfx.me_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
    565		adev->gfx.me_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
    566		
    567	} else {
    568		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
    569		adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
    570		adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
    571	}
    572
    573	if (!amdgpu_sriov_vf(adev)) {
    574		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
    575		err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
    576		if (err)
    577			goto out;
    578		err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
    579		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
    580		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
    581		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
    582
    583		adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
    584		adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
    585		adev->gfx.rlc.save_and_restore_offset =
    586			le32_to_cpu(rlc_hdr->save_and_restore_offset);
    587		adev->gfx.rlc.clear_state_descriptor_offset =
    588			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
    589		adev->gfx.rlc.avail_scratch_ram_locations =
    590			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
    591		adev->gfx.rlc.reg_restore_list_size =
    592			le32_to_cpu(rlc_hdr->reg_restore_list_size);
    593		adev->gfx.rlc.reg_list_format_start =
    594			le32_to_cpu(rlc_hdr->reg_list_format_start);
    595		adev->gfx.rlc.reg_list_format_separate_start =
    596			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
    597		adev->gfx.rlc.starting_offsets_start =
    598			le32_to_cpu(rlc_hdr->starting_offsets_start);
    599		adev->gfx.rlc.reg_list_format_size_bytes =
    600			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
    601		adev->gfx.rlc.reg_list_size_bytes =
    602			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
    603		adev->gfx.rlc.register_list_format =
    604			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
    605					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
    606		if (!adev->gfx.rlc.register_list_format) {
    607			err = -ENOMEM;
    608			goto out;
    609		}
    610
    611		tmp = (unsigned int *)((uintptr_t)rlc_hdr +
    612							   le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
    613		for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
    614			adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
    615
    616		adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
    617
    618		tmp = (unsigned int *)((uintptr_t)rlc_hdr +
    619							   le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
    620		for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
    621			adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
    622
    623		if (version_major == 2) {
    624			if (version_minor >= 1)
    625				gfx_v11_0_init_rlc_ext_microcode(adev);
    626			if (version_minor >= 2)
    627				gfx_v11_0_init_rlc_iram_dram_microcode(adev);
    628			if (version_minor == 3)
    629				gfx_v11_0_init_rlcp_rlcv_microcode(adev);
    630		}
    631	}
    632
    633	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
    634	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
    635	if (err)
    636		goto out;
    637	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
    638	if (err)
    639		goto out;
    640	if (adev->gfx.rs64_enable) {
    641		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
    642		adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
    643		adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
    644		
    645	} else {
    646		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
    647		adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
    648		adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
    649	}
    650
    651	/* only one MEC for gfx 11.0.0. */
    652	adev->gfx.mec2_fw = NULL;
    653
    654	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
    655		if (adev->gfx.rs64_enable) {
    656			cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data;
    657			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP];
    658			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP;
    659			info->fw = adev->gfx.pfp_fw;
    660			header = (const struct common_firmware_header *)info->fw->data;
    661			adev->firmware.fw_size +=
    662				ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
    663
    664			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK];
    665			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK;
    666			info->fw = adev->gfx.pfp_fw;
    667			header = (const struct common_firmware_header *)info->fw->data;
    668			adev->firmware.fw_size +=
    669				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
    670
    671			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK];
    672			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK;
    673			info->fw = adev->gfx.pfp_fw;
    674			header = (const struct common_firmware_header *)info->fw->data;
    675			adev->firmware.fw_size +=
    676				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
    677
    678			cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data;
    679			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME];
    680			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME;
    681			info->fw = adev->gfx.me_fw;
    682			header = (const struct common_firmware_header *)info->fw->data;
    683			adev->firmware.fw_size +=
    684				ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
    685
    686			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK];
    687			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK;
    688			info->fw = adev->gfx.me_fw;
    689			header = (const struct common_firmware_header *)info->fw->data;
    690			adev->firmware.fw_size +=
    691				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
    692
    693			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK];
    694			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK;
    695			info->fw = adev->gfx.me_fw;
    696			header = (const struct common_firmware_header *)info->fw->data;
    697			adev->firmware.fw_size +=
    698				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
    699
    700			cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
    701			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC];
    702			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC;
    703			info->fw = adev->gfx.mec_fw;
    704			header = (const struct common_firmware_header *)info->fw->data;
    705			adev->firmware.fw_size +=
    706				ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
    707
    708			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK];
    709			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK;
    710			info->fw = adev->gfx.mec_fw;
    711			header = (const struct common_firmware_header *)info->fw->data;
    712			adev->firmware.fw_size +=
    713				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
    714
    715			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK];
    716			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK;
    717			info->fw = adev->gfx.mec_fw;
    718			header = (const struct common_firmware_header *)info->fw->data;
    719			adev->firmware.fw_size +=
    720				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
    721
    722			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK];
    723			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK;
    724			info->fw = adev->gfx.mec_fw;
    725			header = (const struct common_firmware_header *)info->fw->data;
    726			adev->firmware.fw_size +=
    727				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
    728
    729			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK];
    730			info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK;
    731			info->fw = adev->gfx.mec_fw;
    732			header = (const struct common_firmware_header *)info->fw->data;
    733			adev->firmware.fw_size +=
    734				ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
    735		} else {
    736			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
    737			info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
    738			info->fw = adev->gfx.pfp_fw;
    739			header = (const struct common_firmware_header *)info->fw->data;
    740			adev->firmware.fw_size +=
    741				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
    742
    743			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
    744			info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
    745			info->fw = adev->gfx.me_fw;
    746			header = (const struct common_firmware_header *)info->fw->data;
    747			adev->firmware.fw_size +=
    748				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
    749
    750			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
    751			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
    752			info->fw = adev->gfx.mec_fw;
    753			header = (const struct common_firmware_header *)info->fw->data;
    754			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
    755			adev->firmware.fw_size +=
    756				ALIGN(le32_to_cpu(header->ucode_size_bytes) -
    757				      le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
    758
    759			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
    760			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
    761			info->fw = adev->gfx.mec_fw;
    762			adev->firmware.fw_size +=
    763				ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
    764		}
    765
    766		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
    767		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
    768		info->fw = adev->gfx.rlc_fw;
    769		if (info->fw) {
    770			header = (const struct common_firmware_header *)info->fw->data;
    771			adev->firmware.fw_size +=
    772				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
    773		}
    774		if (adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
    775		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
    776			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
    777			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
    778			info->fw = adev->gfx.rlc_fw;
    779			adev->firmware.fw_size +=
    780				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
    781
    782			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
    783			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
    784			info->fw = adev->gfx.rlc_fw;
    785			adev->firmware.fw_size +=
    786				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
    787		}
    788
    789		if (adev->gfx.rlc.rlc_iram_ucode_size_bytes &&
    790		    adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
    791			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
    792			info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
    793			info->fw = adev->gfx.rlc_fw;
    794			adev->firmware.fw_size +=
    795				ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
    796
    797			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
    798			info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
    799			info->fw = adev->gfx.rlc_fw;
    800			adev->firmware.fw_size +=
    801				ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
    802		}
    803
    804		if (adev->gfx.rlc.rlcp_ucode_size_bytes) {
    805			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P];
    806			info->ucode_id = AMDGPU_UCODE_ID_RLC_P;
    807			info->fw = adev->gfx.rlc_fw;
    808			adev->firmware.fw_size +=
    809				ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE);
    810		}
    811
    812		if (adev->gfx.rlc.rlcv_ucode_size_bytes) {
    813			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V];
    814			info->ucode_id = AMDGPU_UCODE_ID_RLC_V;
    815			info->fw = adev->gfx.rlc_fw;
    816			adev->firmware.fw_size +=
    817				ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE);
    818		}
    819	}
    820
    821out:
    822	if (err) {
    823		dev_err(adev->dev,
    824			"gfx11: Failed to load firmware \"%s\"\n",
    825			fw_name);
    826		release_firmware(adev->gfx.pfp_fw);
    827		adev->gfx.pfp_fw = NULL;
    828		release_firmware(adev->gfx.me_fw);
    829		adev->gfx.me_fw = NULL;
    830		release_firmware(adev->gfx.rlc_fw);
    831		adev->gfx.rlc_fw = NULL;
    832		release_firmware(adev->gfx.mec_fw);
    833		adev->gfx.mec_fw = NULL;
    834	}
    835
    836	return err;
    837}
    838
    839static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev)
    840{
    841	const struct psp_firmware_header_v1_0 *toc_hdr;
    842	int err = 0;
    843	char fw_name[40];
    844	char ucode_prefix[30];
    845
    846	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
    847
    848	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
    849	err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
    850	if (err)
    851		goto out;
    852
    853	err = amdgpu_ucode_validate(adev->psp.toc_fw);
    854	if (err)
    855		goto out;
    856
    857	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
    858	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
    859	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
    860	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
    861	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
    862				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
    863	return 0;
    864out:
    865	dev_err(adev->dev, "Failed to load TOC microcode\n");
    866	release_firmware(adev->psp.toc_fw);
    867	adev->psp.toc_fw = NULL;
    868	return err;
    869}
    870
    871static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
    872{
    873	u32 count = 0;
    874	const struct cs_section_def *sect = NULL;
    875	const struct cs_extent_def *ext = NULL;
    876
    877	/* begin clear state */
    878	count += 2;
    879	/* context control state */
    880	count += 3;
    881
    882	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
    883		for (ext = sect->section; ext->extent != NULL; ++ext) {
    884			if (sect->id == SECT_CONTEXT)
    885				count += 2 + ext->reg_count;
    886			else
    887				return 0;
    888		}
    889	}
    890
    891	/* set PA_SC_TILE_STEERING_OVERRIDE */
    892	count += 3;
    893	/* end clear state */
    894	count += 2;
    895	/* clear state */
    896	count += 2;
    897
    898	return count;
    899}
    900
    901static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
    902				    volatile u32 *buffer)
    903{
    904	u32 count = 0, i;
    905	const struct cs_section_def *sect = NULL;
    906	const struct cs_extent_def *ext = NULL;
    907	int ctx_reg_offset;
    908
    909	if (adev->gfx.rlc.cs_data == NULL)
    910		return;
    911	if (buffer == NULL)
    912		return;
    913
    914	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
    915	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
    916
    917	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
    918	buffer[count++] = cpu_to_le32(0x80000000);
    919	buffer[count++] = cpu_to_le32(0x80000000);
    920
    921	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
    922		for (ext = sect->section; ext->extent != NULL; ++ext) {
    923			if (sect->id == SECT_CONTEXT) {
    924				buffer[count++] =
    925					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
    926				buffer[count++] = cpu_to_le32(ext->reg_index -
    927						PACKET3_SET_CONTEXT_REG_START);
    928				for (i = 0; i < ext->reg_count; i++)
    929					buffer[count++] = cpu_to_le32(ext->extent[i]);
    930			} else {
    931				return;
    932			}
    933		}
    934	}
    935
    936	ctx_reg_offset =
    937		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
    938	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
    939	buffer[count++] = cpu_to_le32(ctx_reg_offset);
    940	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
    941
    942	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
    943	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
    944
    945	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
    946	buffer[count++] = cpu_to_le32(0);
    947}
    948
    949static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
    950{
    951	/* clear state block */
    952	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
    953			&adev->gfx.rlc.clear_state_gpu_addr,
    954			(void **)&adev->gfx.rlc.cs_ptr);
    955
    956	/* jump table block */
    957	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
    958			&adev->gfx.rlc.cp_table_gpu_addr,
    959			(void **)&adev->gfx.rlc.cp_table_ptr);
    960}
    961
    962static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
    963{
    964	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
    965
    966	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
    967	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
    968	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
    969	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
    970	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
    971	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
    972	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
    973	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
    974	adev->gfx.rlc.rlcg_reg_access_supported = true;
    975}
    976
    977static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
    978{
    979	const struct cs_section_def *cs_data;
    980	int r;
    981
    982	adev->gfx.rlc.cs_data = gfx11_cs_data;
    983
    984	cs_data = adev->gfx.rlc.cs_data;
    985
    986	if (cs_data) {
    987		/* init clear state block */
    988		r = amdgpu_gfx_rlc_init_csb(adev);
    989		if (r)
    990			return r;
    991	}
    992
    993	/* init spm vmid with 0xf */
    994	if (adev->gfx.rlc.funcs->update_spm_vmid)
    995		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
    996
    997	return 0;
    998}
    999
   1000static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
   1001{
   1002	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
   1003	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
   1004	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
   1005}
   1006
   1007static int gfx_v11_0_me_init(struct amdgpu_device *adev)
   1008{
   1009	int r;
   1010
   1011	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
   1012
   1013	amdgpu_gfx_graphics_queue_acquire(adev);
   1014
   1015	r = gfx_v11_0_init_microcode(adev);
   1016	if (r)
   1017		DRM_ERROR("Failed to load gfx firmware!\n");
   1018
   1019	return r;
   1020}
   1021
   1022static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
   1023{
   1024	int r;
   1025	u32 *hpd;
   1026	size_t mec_hpd_size;
   1027
   1028	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
   1029
   1030	/* take ownership of the relevant compute queues */
   1031	amdgpu_gfx_compute_queue_acquire(adev);
   1032	mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
   1033
   1034	if (mec_hpd_size) {
   1035		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
   1036					      AMDGPU_GEM_DOMAIN_GTT,
   1037					      &adev->gfx.mec.hpd_eop_obj,
   1038					      &adev->gfx.mec.hpd_eop_gpu_addr,
   1039					      (void **)&hpd);
   1040		if (r) {
   1041			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
   1042			gfx_v11_0_mec_fini(adev);
   1043			return r;
   1044		}
   1045
   1046		memset(hpd, 0, mec_hpd_size);
   1047
   1048		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
   1049		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
   1050	}
   1051
   1052	return 0;
   1053}
   1054
   1055static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
   1056{
   1057	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
   1058		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
   1059		(address << SQ_IND_INDEX__INDEX__SHIFT));
   1060	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
   1061}
   1062
   1063static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
   1064			   uint32_t thread, uint32_t regno,
   1065			   uint32_t num, uint32_t *out)
   1066{
   1067	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
   1068		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
   1069		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
   1070		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
   1071		(SQ_IND_INDEX__AUTO_INCR_MASK));
   1072	while (num--)
   1073		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
   1074}
   1075
   1076static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
   1077{
   1078	/* in gfx11 the SIMD_ID is specified as part of the INSTANCE
   1079	 * field when performing a select_se_sh so it should be
   1080	 * zero here */
   1081	WARN_ON(simd != 0);
   1082
   1083	/* type 2 wave data */
   1084	dst[(*no_fields)++] = 2;
   1085	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
   1086	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
   1087	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
   1088	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
   1089	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
   1090	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
   1091	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
   1092	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
   1093	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
   1094	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
   1095	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
   1096	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
   1097	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
   1098	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
   1099	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
   1100}
   1101
   1102static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
   1103				     uint32_t wave, uint32_t start,
   1104				     uint32_t size, uint32_t *dst)
   1105{
   1106	WARN_ON(simd != 0);
   1107
   1108	wave_read_regs(
   1109		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
   1110		dst);
   1111}
   1112
   1113static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
   1114				      uint32_t wave, uint32_t thread,
   1115				      uint32_t start, uint32_t size,
   1116				      uint32_t *dst)
   1117{
   1118	wave_read_regs(
   1119		adev, wave, thread,
   1120		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
   1121}
   1122
   1123static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
   1124									  u32 me, u32 pipe, u32 q, u32 vm)
   1125{
   1126	soc21_grbm_select(adev, me, pipe, q, vm);
   1127}
   1128
   1129static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
   1130	.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
   1131	.select_se_sh = &gfx_v11_0_select_se_sh,
   1132	.read_wave_data = &gfx_v11_0_read_wave_data,
   1133	.read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
   1134	.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
   1135	.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
   1136	.init_spm_golden = &gfx_v11_0_init_spm_golden_registers,
   1137};
   1138
   1139static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
   1140{
   1141	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
   1142
   1143	switch (adev->ip_versions[GC_HWIP][0]) {
   1144	case IP_VERSION(11, 0, 0):
   1145	case IP_VERSION(11, 0, 2):
   1146		adev->gfx.config.max_hw_contexts = 8;
   1147		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   1148		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   1149		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
   1150		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
   1151		break;
   1152	case IP_VERSION(11, 0, 1):
   1153		adev->gfx.config.max_hw_contexts = 8;
   1154		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   1155		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   1156		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
   1157		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
   1158		break;
   1159	default:
   1160		BUG();
   1161		break;
   1162	}
   1163
   1164	return 0;
   1165}
   1166
   1167static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
   1168				   int me, int pipe, int queue)
   1169{
   1170	int r;
   1171	struct amdgpu_ring *ring;
   1172	unsigned int irq_type;
   1173
   1174	ring = &adev->gfx.gfx_ring[ring_id];
   1175
   1176	ring->me = me;
   1177	ring->pipe = pipe;
   1178	ring->queue = queue;
   1179
   1180	ring->ring_obj = NULL;
   1181	ring->use_doorbell = true;
   1182
   1183	if (!ring_id)
   1184		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
   1185	else
   1186		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
   1187	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
   1188
   1189	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
   1190	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
   1191			     AMDGPU_RING_PRIO_DEFAULT, NULL);
   1192	if (r)
   1193		return r;
   1194	return 0;
   1195}
   1196
   1197static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
   1198				       int mec, int pipe, int queue)
   1199{
   1200	int r;
   1201	unsigned irq_type;
   1202	struct amdgpu_ring *ring;
   1203	unsigned int hw_prio;
   1204
   1205	ring = &adev->gfx.compute_ring[ring_id];
   1206
   1207	/* mec0 is me1 */
   1208	ring->me = mec + 1;
   1209	ring->pipe = pipe;
   1210	ring->queue = queue;
   1211
   1212	ring->ring_obj = NULL;
   1213	ring->use_doorbell = true;
   1214	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
   1215	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
   1216				+ (ring_id * GFX11_MEC_HPD_SIZE);
   1217	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
   1218
   1219	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
   1220		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
   1221		+ ring->pipe;
   1222	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
   1223			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
   1224	/* type-2 packets are deprecated on MEC, use type-3 instead */
   1225	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
   1226			     hw_prio, NULL);
   1227	if (r)
   1228		return r;
   1229
   1230	return 0;
   1231}
   1232
   1233static struct {
   1234	SOC21_FIRMWARE_ID	id;
   1235	unsigned int		offset;
   1236	unsigned int		size;
   1237} rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
   1238
   1239static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
   1240{
   1241	RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
   1242
   1243	while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
   1244			(ucode->id < SOC21_FIRMWARE_ID_MAX)) {
   1245		rlc_autoload_info[ucode->id].id = ucode->id;
   1246		rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
   1247		rlc_autoload_info[ucode->id].size = ucode->size * 4;
   1248
   1249		ucode++;
   1250	}
   1251}
   1252
   1253static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
   1254{
   1255	uint32_t total_size = 0;
   1256	SOC21_FIRMWARE_ID id;
   1257
   1258	gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
   1259
   1260	for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
   1261		total_size += rlc_autoload_info[id].size;
   1262
   1263	/* In case the offset in rlc toc ucode is aligned */
   1264	if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
   1265		total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
   1266			rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
   1267
   1268	return total_size;
   1269}
   1270
   1271static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
   1272{
   1273	int r;
   1274	uint32_t total_size;
   1275
   1276	total_size = gfx_v11_0_calc_toc_total_size(adev);
   1277
   1278	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
   1279			AMDGPU_GEM_DOMAIN_VRAM,
   1280			&adev->gfx.rlc.rlc_autoload_bo,
   1281			&adev->gfx.rlc.rlc_autoload_gpu_addr,
   1282			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
   1283
   1284	if (r) {
   1285		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
   1286		return r;
   1287	}
   1288
   1289	return 0;
   1290}
   1291
   1292static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
   1293					      SOC21_FIRMWARE_ID id,
   1294			    		      const void *fw_data,
   1295					      uint32_t fw_size,
   1296					      uint32_t *fw_autoload_mask)
   1297{
   1298	uint32_t toc_offset;
   1299	uint32_t toc_fw_size;
   1300	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
   1301
   1302	if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
   1303		return;
   1304
   1305	toc_offset = rlc_autoload_info[id].offset;
   1306	toc_fw_size = rlc_autoload_info[id].size;
   1307
   1308	if (fw_size == 0)
   1309		fw_size = toc_fw_size;
   1310
   1311	if (fw_size > toc_fw_size)
   1312		fw_size = toc_fw_size;
   1313
   1314	memcpy(ptr + toc_offset, fw_data, fw_size);
   1315
   1316	if (fw_size < toc_fw_size)
   1317		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
   1318
   1319	if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
   1320		*(uint64_t *)fw_autoload_mask |= 1ULL << id;
   1321}
   1322
   1323static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
   1324							uint32_t *fw_autoload_mask)
   1325{
   1326	void *data;
   1327	uint32_t size;
   1328	uint64_t *toc_ptr;
   1329
   1330	*(uint64_t *)fw_autoload_mask |= 0x1;
   1331
   1332	DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
   1333
   1334	data = adev->psp.toc.start_addr;
   1335	size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
   1336
   1337	toc_ptr = (uint64_t *)data + size / 8 - 1;
   1338	*toc_ptr = *(uint64_t *)fw_autoload_mask;
   1339
   1340	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
   1341					data, size, fw_autoload_mask);
   1342}
   1343
   1344static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
   1345							uint32_t *fw_autoload_mask)
   1346{
   1347	const __le32 *fw_data;
   1348	uint32_t fw_size;
   1349	const struct gfx_firmware_header_v1_0 *cp_hdr;
   1350	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
   1351	const struct rlc_firmware_header_v2_0 *rlc_hdr;
   1352	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
   1353	uint16_t version_major, version_minor;
   1354
   1355	if (adev->gfx.rs64_enable) {
   1356		/* pfp ucode */
   1357		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
   1358			adev->gfx.pfp_fw->data;
   1359		/* instruction */
   1360		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
   1361			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
   1362		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
   1363		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
   1364						fw_data, fw_size, fw_autoload_mask);
   1365		/* data */
   1366		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
   1367			le32_to_cpu(cpv2_hdr->data_offset_bytes));
   1368		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
   1369		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
   1370						fw_data, fw_size, fw_autoload_mask);
   1371		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
   1372						fw_data, fw_size, fw_autoload_mask);
   1373		/* me ucode */
   1374		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
   1375			adev->gfx.me_fw->data;
   1376		/* instruction */
   1377		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
   1378			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
   1379		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
   1380		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
   1381						fw_data, fw_size, fw_autoload_mask);
   1382		/* data */
   1383		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
   1384			le32_to_cpu(cpv2_hdr->data_offset_bytes));
   1385		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
   1386		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
   1387						fw_data, fw_size, fw_autoload_mask);
   1388		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
   1389						fw_data, fw_size, fw_autoload_mask);
   1390		/* mec ucode */
   1391		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
   1392			adev->gfx.mec_fw->data;
   1393		/* instruction */
   1394		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
   1395			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
   1396		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
   1397		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
   1398						fw_data, fw_size, fw_autoload_mask);
   1399		/* data */
   1400		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
   1401			le32_to_cpu(cpv2_hdr->data_offset_bytes));
   1402		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
   1403		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
   1404						fw_data, fw_size, fw_autoload_mask);
   1405		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
   1406						fw_data, fw_size, fw_autoload_mask);
   1407		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
   1408						fw_data, fw_size, fw_autoload_mask);
   1409		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
   1410						fw_data, fw_size, fw_autoload_mask);
   1411	} else {
   1412		/* pfp ucode */
   1413		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
   1414			adev->gfx.pfp_fw->data;
   1415		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
   1416				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
   1417		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
   1418		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
   1419						fw_data, fw_size, fw_autoload_mask);
   1420
   1421		/* me ucode */
   1422		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
   1423			adev->gfx.me_fw->data;
   1424		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
   1425				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
   1426		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
   1427		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
   1428						fw_data, fw_size, fw_autoload_mask);
   1429
   1430		/* mec ucode */
   1431		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
   1432			adev->gfx.mec_fw->data;
   1433		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
   1434				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
   1435		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
   1436			cp_hdr->jt_size * 4;
   1437		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
   1438						fw_data, fw_size, fw_autoload_mask);
   1439	}
   1440
   1441	/* rlc ucode */
   1442	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
   1443		adev->gfx.rlc_fw->data;
   1444	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
   1445			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
   1446	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
   1447	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
   1448					fw_data, fw_size, fw_autoload_mask);
   1449
   1450	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
   1451	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
   1452	if (version_major == 2) {
   1453		if (version_minor >= 2) {
   1454			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
   1455
   1456			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
   1457					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
   1458			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
   1459			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
   1460					fw_data, fw_size, fw_autoload_mask);
   1461
   1462			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
   1463					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
   1464			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
   1465			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
   1466					fw_data, fw_size, fw_autoload_mask);
   1467		}
   1468	}
   1469}
   1470
   1471static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
   1472							uint32_t *fw_autoload_mask)
   1473{
   1474	const __le32 *fw_data;
   1475	uint32_t fw_size;
   1476	const struct sdma_firmware_header_v2_0 *sdma_hdr;
   1477
   1478	sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
   1479		adev->sdma.instance[0].fw->data;
   1480	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
   1481			le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
   1482	fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
   1483
   1484	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
   1485			SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
   1486
   1487	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
   1488			le32_to_cpu(sdma_hdr->ctl_ucode_offset));
   1489	fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
   1490
   1491	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
   1492			SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
   1493}
   1494
   1495static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
   1496							uint32_t *fw_autoload_mask)
   1497{
   1498	const __le32 *fw_data;
   1499	unsigned fw_size;
   1500	const struct mes_firmware_header_v1_0 *mes_hdr;
   1501	int pipe, ucode_id, data_id;
   1502
   1503	for (pipe = 0; pipe < 2; pipe++) {
   1504		if (pipe==0) {
   1505			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
   1506			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
   1507		} else {
   1508			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
   1509			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
   1510		}
   1511
   1512		mes_hdr = (const struct mes_firmware_header_v1_0 *)
   1513			adev->mes.fw[pipe]->data;
   1514
   1515		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
   1516				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
   1517		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
   1518
   1519		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
   1520				ucode_id, fw_data, fw_size, fw_autoload_mask);
   1521
   1522		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
   1523				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
   1524		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
   1525
   1526		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
   1527				data_id, fw_data, fw_size, fw_autoload_mask);
   1528	}
   1529}
   1530
   1531static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
   1532{
   1533	uint32_t rlc_g_offset, rlc_g_size;
   1534	uint64_t gpu_addr;
   1535	uint32_t autoload_fw_id[2];
   1536
   1537	memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
   1538
   1539	/* RLC autoload sequence 2: copy ucode */
   1540	gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
   1541	gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
   1542	gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
   1543	gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
   1544
   1545	rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
   1546	rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
   1547	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
   1548
   1549	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
   1550	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
   1551
   1552	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
   1553
   1554	/* RLC autoload sequence 3: load IMU fw */
   1555	if (adev->gfx.imu.funcs->load_microcode)
   1556		adev->gfx.imu.funcs->load_microcode(adev);
   1557	/* RLC autoload sequence 4 init IMU fw */
   1558	if (adev->gfx.imu.funcs->setup_imu)
   1559		adev->gfx.imu.funcs->setup_imu(adev);
   1560	if (adev->gfx.imu.funcs->start_imu)
   1561		adev->gfx.imu.funcs->start_imu(adev);
   1562
   1563	/* RLC autoload sequence 5 disable gpa mode */
   1564	gfx_v11_0_disable_gpa_mode(adev);
   1565
   1566	return 0;
   1567}
   1568
   1569static int gfx_v11_0_sw_init(void *handle)
   1570{
   1571	int i, j, k, r, ring_id = 0;
   1572	struct amdgpu_kiq *kiq;
   1573	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1574
   1575	adev->gfxhub.funcs->init(adev);
   1576
   1577	switch (adev->ip_versions[GC_HWIP][0]) {
   1578	case IP_VERSION(11, 0, 0):
   1579	case IP_VERSION(11, 0, 1):
   1580	case IP_VERSION(11, 0, 2):
   1581		adev->gfx.me.num_me = 1;
   1582		adev->gfx.me.num_pipe_per_me = 1;
   1583		adev->gfx.me.num_queue_per_pipe = 1;
   1584		adev->gfx.mec.num_mec = 2;
   1585		adev->gfx.mec.num_pipe_per_mec = 4;
   1586		adev->gfx.mec.num_queue_per_pipe = 4;
   1587		break;
   1588	default:
   1589		adev->gfx.me.num_me = 1;
   1590		adev->gfx.me.num_pipe_per_me = 1;
   1591		adev->gfx.me.num_queue_per_pipe = 1;
   1592		adev->gfx.mec.num_mec = 1;
   1593		adev->gfx.mec.num_pipe_per_mec = 4;
   1594		adev->gfx.mec.num_queue_per_pipe = 8;
   1595		break;
   1596	}
   1597
   1598	/* EOP Event */
   1599	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
   1600			      GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
   1601			      &adev->gfx.eop_irq);
   1602	if (r)
   1603		return r;
   1604
   1605	/* Privileged reg */
   1606	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
   1607			      GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
   1608			      &adev->gfx.priv_reg_irq);
   1609	if (r)
   1610		return r;
   1611
   1612	/* Privileged inst */
   1613	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
   1614			      GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
   1615			      &adev->gfx.priv_inst_irq);
   1616	if (r)
   1617		return r;
   1618
   1619	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
   1620
   1621	if (adev->gfx.imu.funcs) {
   1622		if (adev->gfx.imu.funcs->init_microcode) {
   1623			r = adev->gfx.imu.funcs->init_microcode(adev);
   1624			if (r)
   1625				DRM_ERROR("Failed to load imu firmware!\n");
   1626		}
   1627	}
   1628
   1629	r = gfx_v11_0_me_init(adev);
   1630	if (r)
   1631		return r;
   1632
   1633	r = gfx_v11_0_rlc_init(adev);
   1634	if (r) {
   1635		DRM_ERROR("Failed to init rlc BOs!\n");
   1636		return r;
   1637	}
   1638
   1639	r = gfx_v11_0_mec_init(adev);
   1640	if (r) {
   1641		DRM_ERROR("Failed to init MEC BOs!\n");
   1642		return r;
   1643	}
   1644
   1645	/* set up the gfx ring */
   1646	for (i = 0; i < adev->gfx.me.num_me; i++) {
   1647		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
   1648			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
   1649				if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
   1650					continue;
   1651
   1652				r = gfx_v11_0_gfx_ring_init(adev, ring_id,
   1653							    i, k, j);
   1654				if (r)
   1655					return r;
   1656				ring_id++;
   1657			}
   1658		}
   1659	}
   1660
   1661	ring_id = 0;
   1662	/* set up the compute queues - allocate horizontally across pipes */
   1663	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
   1664		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
   1665			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
   1666				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
   1667								     j))
   1668					continue;
   1669
   1670				r = gfx_v11_0_compute_ring_init(adev, ring_id,
   1671								i, k, j);
   1672				if (r)
   1673					return r;
   1674
   1675				ring_id++;
   1676			}
   1677		}
   1678	}
   1679
   1680	if (!adev->enable_mes_kiq) {
   1681		r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE);
   1682		if (r) {
   1683			DRM_ERROR("Failed to init KIQ BOs!\n");
   1684			return r;
   1685		}
   1686
   1687		kiq = &adev->gfx.kiq;
   1688		r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
   1689		if (r)
   1690			return r;
   1691	}
   1692
   1693	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd));
   1694	if (r)
   1695		return r;
   1696
   1697	/* allocate visible FB for rlc auto-loading fw */
   1698	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
   1699		r = gfx_v11_0_init_toc_microcode(adev);
   1700		if (r)
   1701			dev_err(adev->dev, "Failed to load toc firmware!\n");
   1702		r = gfx_v11_0_rlc_autoload_buffer_init(adev);
   1703		if (r)
   1704			return r;
   1705	}
   1706
   1707	r = gfx_v11_0_gpu_early_init(adev);
   1708	if (r)
   1709		return r;
   1710
   1711	return 0;
   1712}
   1713
   1714static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
   1715{
   1716	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
   1717			      &adev->gfx.pfp.pfp_fw_gpu_addr,
   1718			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
   1719
   1720	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
   1721			      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
   1722			      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
   1723}
   1724
   1725static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
   1726{
   1727	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
   1728			      &adev->gfx.me.me_fw_gpu_addr,
   1729			      (void **)&adev->gfx.me.me_fw_ptr);
   1730
   1731	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
   1732			       &adev->gfx.me.me_fw_data_gpu_addr,
   1733			       (void **)&adev->gfx.me.me_fw_data_ptr);
   1734}
   1735
   1736static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
   1737{
   1738	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
   1739			&adev->gfx.rlc.rlc_autoload_gpu_addr,
   1740			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
   1741}
   1742
   1743static int gfx_v11_0_sw_fini(void *handle)
   1744{
   1745	int i;
   1746	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1747
   1748	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
   1749		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
   1750	for (i = 0; i < adev->gfx.num_compute_rings; i++)
   1751		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
   1752
   1753	amdgpu_gfx_mqd_sw_fini(adev);
   1754
   1755	if (!adev->enable_mes_kiq) {
   1756		amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
   1757		amdgpu_gfx_kiq_fini(adev);
   1758	}
   1759
   1760	gfx_v11_0_pfp_fini(adev);
   1761	gfx_v11_0_me_fini(adev);
   1762	gfx_v11_0_rlc_fini(adev);
   1763	gfx_v11_0_mec_fini(adev);
   1764
   1765	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
   1766		gfx_v11_0_rlc_autoload_buffer_fini(adev);
   1767
   1768	gfx_v11_0_free_microcode(adev);
   1769
   1770	return 0;
   1771}
   1772
   1773static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
   1774				   u32 sh_num, u32 instance)
   1775{
   1776	u32 data;
   1777
   1778	if (instance == 0xffffffff)
   1779		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
   1780				     INSTANCE_BROADCAST_WRITES, 1);
   1781	else
   1782		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
   1783				     instance);
   1784
   1785	if (se_num == 0xffffffff)
   1786		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
   1787				     1);
   1788	else
   1789		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
   1790
   1791	if (sh_num == 0xffffffff)
   1792		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
   1793				     1);
   1794	else
   1795		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
   1796
   1797	WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
   1798}
   1799
   1800static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
   1801{
   1802	u32 data, mask;
   1803
   1804	data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
   1805	data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
   1806
   1807	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
   1808	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
   1809
   1810	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
   1811					 adev->gfx.config.max_sh_per_se);
   1812
   1813	return (~data) & mask;
   1814}
   1815
   1816static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
   1817{
   1818	int i, j;
   1819	u32 data;
   1820	u32 active_rbs = 0;
   1821	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
   1822					adev->gfx.config.max_sh_per_se;
   1823
   1824	mutex_lock(&adev->grbm_idx_mutex);
   1825	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   1826		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   1827			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
   1828			data = gfx_v11_0_get_rb_active_bitmap(adev);
   1829			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
   1830					       rb_bitmap_width_per_sh);
   1831		}
   1832	}
   1833	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   1834	mutex_unlock(&adev->grbm_idx_mutex);
   1835
   1836	adev->gfx.config.backend_enable_mask = active_rbs;
   1837	adev->gfx.config.num_rbs = hweight32(active_rbs);
   1838}
   1839
   1840#define DEFAULT_SH_MEM_BASES	(0x6000)
   1841#define LDS_APP_BASE           0x1
   1842#define SCRATCH_APP_BASE       0x2
   1843
   1844static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
   1845{
   1846	int i;
   1847	uint32_t sh_mem_bases;
   1848	uint32_t data;
   1849
   1850	/*
   1851	 * Configure apertures:
   1852	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
   1853	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
   1854	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
   1855	 */
   1856	sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
   1857			SCRATCH_APP_BASE;
   1858
   1859	mutex_lock(&adev->srbm_mutex);
   1860	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
   1861		soc21_grbm_select(adev, 0, 0, 0, i);
   1862		/* CP and shaders */
   1863		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
   1864		WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
   1865
   1866		/* Enable trap for each kfd vmid. */
   1867		data = RREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL));
   1868		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
   1869	}
   1870	soc21_grbm_select(adev, 0, 0, 0, 0);
   1871	mutex_unlock(&adev->srbm_mutex);
   1872
   1873	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
   1874	   acccess. These should be enabled by FW for target VMIDs. */
   1875	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
   1876		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
   1877		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
   1878		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
   1879		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
   1880	}
   1881}
   1882
   1883static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
   1884{
   1885	int vmid;
   1886
   1887	/*
   1888	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
   1889	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
   1890	 * the driver can enable them for graphics. VMID0 should maintain
   1891	 * access so that HWS firmware can save/restore entries.
   1892	 */
   1893	for (vmid = 1; vmid < 16; vmid++) {
   1894		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
   1895		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
   1896		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
   1897		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
   1898	}
   1899}
   1900
   1901static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
   1902{
   1903	/* TODO: harvest feature to be added later. */
   1904}
   1905
   1906static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
   1907{
   1908	/* TCCs are global (not instanced). */
   1909	uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
   1910			       RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
   1911
   1912	adev->gfx.config.tcc_disabled_mask =
   1913		REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
   1914		(REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
   1915}
   1916
   1917static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
   1918{
   1919	u32 tmp;
   1920	int i;
   1921
   1922	WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
   1923
   1924	gfx_v11_0_setup_rb(adev);
   1925	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
   1926	gfx_v11_0_get_tcc_info(adev);
   1927	adev->gfx.config.pa_sc_tile_steering_override = 0;
   1928
   1929	/* XXX SH_MEM regs */
   1930	/* where to put LDS, scratch, GPUVM in FSA64 space */
   1931	mutex_lock(&adev->srbm_mutex);
   1932	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
   1933		soc21_grbm_select(adev, 0, 0, 0, i);
   1934		/* CP and shaders */
   1935		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
   1936		if (i != 0) {
   1937			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
   1938				(adev->gmc.private_aperture_start >> 48));
   1939			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
   1940				(adev->gmc.shared_aperture_start >> 48));
   1941			WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
   1942		}
   1943	}
   1944	soc21_grbm_select(adev, 0, 0, 0, 0);
   1945
   1946	mutex_unlock(&adev->srbm_mutex);
   1947
   1948	gfx_v11_0_init_compute_vmid(adev);
   1949	gfx_v11_0_init_gds_vmid(adev);
   1950}
   1951
   1952static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
   1953					       bool enable)
   1954{
   1955	u32 tmp;
   1956
   1957	if (amdgpu_sriov_vf(adev))
   1958		return;
   1959
   1960	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
   1961
   1962	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
   1963			    enable ? 1 : 0);
   1964	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
   1965			    enable ? 1 : 0);
   1966	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
   1967			    enable ? 1 : 0);
   1968	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
   1969			    enable ? 1 : 0);
   1970
   1971	WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
   1972}
   1973
   1974static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
   1975{
   1976	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
   1977
   1978	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
   1979			adev->gfx.rlc.clear_state_gpu_addr >> 32);
   1980	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
   1981			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
   1982	WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
   1983
   1984	return 0;
   1985}
   1986
   1987static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
   1988{
   1989	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
   1990
   1991	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
   1992	WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
   1993}
   1994
   1995static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
   1996{
   1997	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
   1998	udelay(50);
   1999	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
   2000	udelay(50);
   2001}
   2002
   2003static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
   2004					     bool enable)
   2005{
   2006	uint32_t rlc_pg_cntl;
   2007
   2008	rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
   2009
   2010	if (!enable) {
   2011		/* RLC_PG_CNTL[23] = 0 (default)
   2012		 * RLC will wait for handshake acks with SMU
   2013		 * GFXOFF will be enabled
   2014		 * RLC_PG_CNTL[23] = 1
   2015		 * RLC will not issue any message to SMU
   2016		 * hence no handshake between SMU & RLC
   2017		 * GFXOFF will be disabled
   2018		 */
   2019		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
   2020	} else
   2021		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
   2022	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
   2023}
   2024
   2025static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
   2026{
   2027	/* TODO: enable rlc & smu handshake until smu
   2028	 * and gfxoff feature works as expected */
   2029	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
   2030		gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
   2031
   2032	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
   2033	udelay(50);
   2034}
   2035
   2036static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
   2037{
   2038	uint32_t tmp;
   2039
   2040	/* enable Save Restore Machine */
   2041	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
   2042	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
   2043	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
   2044	WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
   2045}
   2046
   2047static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
   2048{
   2049	const struct rlc_firmware_header_v2_0 *hdr;
   2050	const __le32 *fw_data;
   2051	unsigned i, fw_size;
   2052
   2053	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
   2054	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
   2055			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   2056	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
   2057
   2058	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
   2059		     RLCG_UCODE_LOADING_START_ADDRESS);
   2060
   2061	for (i = 0; i < fw_size; i++)
   2062		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
   2063			     le32_to_cpup(fw_data++));
   2064
   2065	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
   2066}
   2067
   2068static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
   2069{
   2070	const struct rlc_firmware_header_v2_2 *hdr;
   2071	const __le32 *fw_data;
   2072	unsigned i, fw_size;
   2073	u32 tmp;
   2074
   2075	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
   2076
   2077	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
   2078			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
   2079	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
   2080
   2081	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
   2082
   2083	for (i = 0; i < fw_size; i++) {
   2084		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
   2085			msleep(1);
   2086		WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
   2087				le32_to_cpup(fw_data++));
   2088	}
   2089
   2090	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
   2091
   2092	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
   2093			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
   2094	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
   2095
   2096	WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
   2097	for (i = 0; i < fw_size; i++) {
   2098		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
   2099			msleep(1);
   2100		WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
   2101				le32_to_cpup(fw_data++));
   2102	}
   2103
   2104	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
   2105
   2106	tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
   2107	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
   2108	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
   2109	WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
   2110}
   2111
   2112static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
   2113{
   2114	const struct rlc_firmware_header_v2_3 *hdr;
   2115	const __le32 *fw_data;
   2116	unsigned i, fw_size;
   2117	u32 tmp;
   2118
   2119	hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
   2120
   2121	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
   2122			le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
   2123	fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
   2124
   2125	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
   2126
   2127	for (i = 0; i < fw_size; i++) {
   2128		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
   2129			msleep(1);
   2130		WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
   2131				le32_to_cpup(fw_data++));
   2132	}
   2133
   2134	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
   2135
   2136	tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
   2137	tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
   2138	WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
   2139
   2140	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
   2141			le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
   2142	fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
   2143
   2144	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
   2145
   2146	for (i = 0; i < fw_size; i++) {
   2147		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
   2148			msleep(1);
   2149		WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
   2150				le32_to_cpup(fw_data++));
   2151	}
   2152
   2153	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
   2154
   2155	tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
   2156	tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
   2157	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
   2158}
   2159
   2160static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
   2161{
   2162	const struct rlc_firmware_header_v2_0 *hdr;
   2163	uint16_t version_major;
   2164	uint16_t version_minor;
   2165
   2166	if (!adev->gfx.rlc_fw)
   2167		return -EINVAL;
   2168
   2169	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
   2170	amdgpu_ucode_print_rlc_hdr(&hdr->header);
   2171
   2172	version_major = le16_to_cpu(hdr->header.header_version_major);
   2173	version_minor = le16_to_cpu(hdr->header.header_version_minor);
   2174
   2175	if (version_major == 2) {
   2176		gfx_v11_0_load_rlcg_microcode(adev);
   2177		if (amdgpu_dpm == 1) {
   2178			if (version_minor >= 2)
   2179				gfx_v11_0_load_rlc_iram_dram_microcode(adev);
   2180			if (version_minor == 3)
   2181				gfx_v11_0_load_rlcp_rlcv_microcode(adev);
   2182		}
   2183		
   2184		return 0;
   2185	}
   2186
   2187	return -EINVAL;
   2188}
   2189
   2190static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
   2191{
   2192	int r;
   2193
   2194	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
   2195		gfx_v11_0_init_csb(adev);
   2196
   2197		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
   2198			gfx_v11_0_rlc_enable_srm(adev);
   2199	} else {
   2200		if (amdgpu_sriov_vf(adev)) {
   2201			gfx_v11_0_init_csb(adev);
   2202			return 0;
   2203		}
   2204
   2205		adev->gfx.rlc.funcs->stop(adev);
   2206
   2207		/* disable CG */
   2208		WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
   2209
   2210		/* disable PG */
   2211		WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
   2212
   2213		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
   2214			/* legacy rlc firmware loading */
   2215			r = gfx_v11_0_rlc_load_microcode(adev);
   2216			if (r)
   2217				return r;
   2218		}
   2219
   2220		gfx_v11_0_init_csb(adev);
   2221
   2222		adev->gfx.rlc.funcs->start(adev);
   2223	}
   2224	return 0;
   2225}
   2226
   2227static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
   2228{
   2229	uint32_t usec_timeout = 50000;  /* wait for 50ms */
   2230	uint32_t tmp;
   2231	int i;
   2232
   2233	/* Trigger an invalidation of the L1 instruction caches */
   2234	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
   2235	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
   2236	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
   2237
   2238	/* Wait for invalidation complete */
   2239	for (i = 0; i < usec_timeout; i++) {
   2240		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
   2241		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
   2242					INVALIDATE_CACHE_COMPLETE))
   2243			break;
   2244		udelay(1);
   2245	}
   2246
   2247	if (i >= usec_timeout) {
   2248		dev_err(adev->dev, "failed to invalidate instruction cache\n");
   2249		return -EINVAL;
   2250	}
   2251
   2252	if (amdgpu_emu_mode == 1)
   2253		adev->hdp.funcs->flush_hdp(adev, NULL);
   2254
   2255	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
   2256	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
   2257	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
   2258	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
   2259	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
   2260	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
   2261
   2262	/* Program me ucode address into intruction cache address register */
   2263	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
   2264			lower_32_bits(addr) & 0xFFFFF000);
   2265	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
   2266			upper_32_bits(addr));
   2267
   2268	return 0;
   2269}
   2270
   2271static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
   2272{
   2273	uint32_t usec_timeout = 50000;  /* wait for 50ms */
   2274	uint32_t tmp;
   2275	int i;
   2276
   2277	/* Trigger an invalidation of the L1 instruction caches */
   2278	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
   2279	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
   2280	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
   2281
   2282	/* Wait for invalidation complete */
   2283	for (i = 0; i < usec_timeout; i++) {
   2284		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
   2285		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
   2286					INVALIDATE_CACHE_COMPLETE))
   2287			break;
   2288		udelay(1);
   2289	}
   2290
   2291	if (i >= usec_timeout) {
   2292		dev_err(adev->dev, "failed to invalidate instruction cache\n");
   2293		return -EINVAL;
   2294	}
   2295
   2296	if (amdgpu_emu_mode == 1)
   2297		adev->hdp.funcs->flush_hdp(adev, NULL);
   2298
   2299	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
   2300	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
   2301	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
   2302	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
   2303	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
   2304	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
   2305
   2306	/* Program pfp ucode address into intruction cache address register */
   2307	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
   2308			lower_32_bits(addr) & 0xFFFFF000);
   2309	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
   2310			upper_32_bits(addr));
   2311
   2312	return 0;
   2313}
   2314
   2315static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
   2316{
   2317	uint32_t usec_timeout = 50000;  /* wait for 50ms */
   2318	uint32_t tmp;
   2319	int i;
   2320
   2321	/* Trigger an invalidation of the L1 instruction caches */
   2322	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
   2323	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
   2324
   2325	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
   2326
   2327	/* Wait for invalidation complete */
   2328	for (i = 0; i < usec_timeout; i++) {
   2329		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
   2330		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
   2331					INVALIDATE_CACHE_COMPLETE))
   2332			break;
   2333		udelay(1);
   2334	}
   2335
   2336	if (i >= usec_timeout) {
   2337		dev_err(adev->dev, "failed to invalidate instruction cache\n");
   2338		return -EINVAL;
   2339	}
   2340
   2341	if (amdgpu_emu_mode == 1)
   2342		adev->hdp.funcs->flush_hdp(adev, NULL);
   2343
   2344	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
   2345	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
   2346	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
   2347	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
   2348	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
   2349
   2350	/* Program mec1 ucode address into intruction cache address register */
   2351	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
   2352			lower_32_bits(addr) & 0xFFFFF000);
   2353	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
   2354			upper_32_bits(addr));
   2355
   2356	return 0;
   2357}
   2358
   2359static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
   2360{
   2361	uint32_t usec_timeout = 50000;  /* wait for 50ms */
   2362	uint32_t tmp;
   2363	unsigned i, pipe_id;
   2364	const struct gfx_firmware_header_v2_0 *pfp_hdr;
   2365
   2366	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
   2367		adev->gfx.pfp_fw->data;
   2368
   2369	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
   2370		lower_32_bits(addr));
   2371	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
   2372		upper_32_bits(addr));
   2373
   2374	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
   2375	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
   2376	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
   2377	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
   2378	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
   2379
   2380	/*
   2381	 * Programming any of the CP_PFP_IC_BASE registers
   2382	 * forces invalidation of the ME L1 I$. Wait for the
   2383	 * invalidation complete
   2384	 */
   2385	for (i = 0; i < usec_timeout; i++) {
   2386		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
   2387		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
   2388			INVALIDATE_CACHE_COMPLETE))
   2389			break;
   2390		udelay(1);
   2391	}
   2392
   2393	if (i >= usec_timeout) {
   2394		dev_err(adev->dev, "failed to invalidate instruction cache\n");
   2395		return -EINVAL;
   2396	}
   2397
   2398	/* Prime the L1 instruction caches */
   2399	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
   2400	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
   2401	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
   2402	/* Waiting for cache primed*/
   2403	for (i = 0; i < usec_timeout; i++) {
   2404		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
   2405		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
   2406			ICACHE_PRIMED))
   2407			break;
   2408		udelay(1);
   2409	}
   2410
   2411	if (i >= usec_timeout) {
   2412		dev_err(adev->dev, "failed to prime instruction cache\n");
   2413		return -EINVAL;
   2414	}
   2415
   2416	mutex_lock(&adev->srbm_mutex);
   2417	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
   2418		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
   2419		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
   2420			(pfp_hdr->ucode_start_addr_hi << 30) |
   2421			(pfp_hdr->ucode_start_addr_lo >> 2));
   2422		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
   2423			pfp_hdr->ucode_start_addr_hi >> 2);
   2424
   2425		/*
   2426		 * Program CP_ME_CNTL to reset given PIPE to take
   2427		 * effect of CP_PFP_PRGRM_CNTR_START.
   2428		 */
   2429		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
   2430		if (pipe_id == 0)
   2431			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   2432					PFP_PIPE0_RESET, 1);
   2433		else
   2434			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   2435					PFP_PIPE1_RESET, 1);
   2436		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   2437
   2438		/* Clear pfp pipe0 reset bit. */
   2439		if (pipe_id == 0)
   2440			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   2441					PFP_PIPE0_RESET, 0);
   2442		else
   2443			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   2444					PFP_PIPE1_RESET, 0);
   2445		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   2446
   2447		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
   2448			lower_32_bits(addr2));
   2449		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
   2450			upper_32_bits(addr2));
   2451	}
   2452	soc21_grbm_select(adev, 0, 0, 0, 0);
   2453	mutex_unlock(&adev->srbm_mutex);
   2454
   2455	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
   2456	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
   2457	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
   2458	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
   2459
   2460	/* Invalidate the data caches */
   2461	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
   2462	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
   2463	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
   2464
   2465	for (i = 0; i < usec_timeout; i++) {
   2466		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
   2467		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
   2468			INVALIDATE_DCACHE_COMPLETE))
   2469			break;
   2470		udelay(1);
   2471	}
   2472
   2473	if (i >= usec_timeout) {
   2474		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
   2475		return -EINVAL;
   2476	}
   2477
   2478	return 0;
   2479}
   2480
   2481static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
   2482{
   2483	uint32_t usec_timeout = 50000;  /* wait for 50ms */
   2484	uint32_t tmp;
   2485	unsigned i, pipe_id;
   2486	const struct gfx_firmware_header_v2_0 *me_hdr;
   2487
   2488	me_hdr = (const struct gfx_firmware_header_v2_0 *)
   2489		adev->gfx.me_fw->data;
   2490
   2491	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
   2492		lower_32_bits(addr));
   2493	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
   2494		upper_32_bits(addr));
   2495
   2496	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
   2497	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
   2498	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
   2499	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
   2500	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
   2501
   2502	/*
   2503	 * Programming any of the CP_ME_IC_BASE registers
   2504	 * forces invalidation of the ME L1 I$. Wait for the
   2505	 * invalidation complete
   2506	 */
   2507	for (i = 0; i < usec_timeout; i++) {
   2508		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
   2509		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
   2510			INVALIDATE_CACHE_COMPLETE))
   2511			break;
   2512		udelay(1);
   2513	}
   2514
   2515	if (i >= usec_timeout) {
   2516		dev_err(adev->dev, "failed to invalidate instruction cache\n");
   2517		return -EINVAL;
   2518	}
   2519
   2520	/* Prime the instruction caches */
   2521	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
   2522	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
   2523	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
   2524
   2525	/* Waiting for instruction cache primed*/
   2526	for (i = 0; i < usec_timeout; i++) {
   2527		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
   2528		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
   2529			ICACHE_PRIMED))
   2530			break;
   2531		udelay(1);
   2532	}
   2533
   2534	if (i >= usec_timeout) {
   2535		dev_err(adev->dev, "failed to prime instruction cache\n");
   2536		return -EINVAL;
   2537	}
   2538
   2539	mutex_lock(&adev->srbm_mutex);
   2540	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
   2541		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
   2542		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
   2543			(me_hdr->ucode_start_addr_hi << 30) |
   2544			(me_hdr->ucode_start_addr_lo >> 2) );
   2545		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
   2546			me_hdr->ucode_start_addr_hi>>2);
   2547
   2548		/*
   2549		 * Program CP_ME_CNTL to reset given PIPE to take
   2550		 * effect of CP_PFP_PRGRM_CNTR_START.
   2551		 */
   2552		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
   2553		if (pipe_id == 0)
   2554			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   2555					ME_PIPE0_RESET, 1);
   2556		else
   2557			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   2558					ME_PIPE1_RESET, 1);
   2559		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   2560
   2561		/* Clear pfp pipe0 reset bit. */
   2562		if (pipe_id == 0)
   2563			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   2564					ME_PIPE0_RESET, 0);
   2565		else
   2566			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   2567					ME_PIPE1_RESET, 0);
   2568		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   2569
   2570		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
   2571			lower_32_bits(addr2));
   2572		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
   2573			upper_32_bits(addr2));
   2574	}
   2575	soc21_grbm_select(adev, 0, 0, 0, 0);
   2576	mutex_unlock(&adev->srbm_mutex);
   2577
   2578	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
   2579	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
   2580	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
   2581	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
   2582
   2583	/* Invalidate the data caches */
   2584	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
   2585	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
   2586	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
   2587
   2588	for (i = 0; i < usec_timeout; i++) {
   2589		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
   2590		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
   2591			INVALIDATE_DCACHE_COMPLETE))
   2592			break;
   2593		udelay(1);
   2594	}
   2595
   2596	if (i >= usec_timeout) {
   2597		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
   2598		return -EINVAL;
   2599	}
   2600
   2601	return 0;
   2602}
   2603
   2604static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
   2605{
   2606	uint32_t usec_timeout = 50000;  /* wait for 50ms */
   2607	uint32_t tmp;
   2608	unsigned i;
   2609	const struct gfx_firmware_header_v2_0 *mec_hdr;
   2610
   2611	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
   2612		adev->gfx.mec_fw->data;
   2613
   2614	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
   2615	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
   2616	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
   2617	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
   2618	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
   2619
   2620	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
   2621	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
   2622	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
   2623	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
   2624
   2625	mutex_lock(&adev->srbm_mutex);
   2626	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
   2627		soc21_grbm_select(adev, 1, i, 0, 0);
   2628
   2629		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
   2630		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
   2631		     upper_32_bits(addr2));
   2632
   2633		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
   2634					mec_hdr->ucode_start_addr_lo >> 2 |
   2635					mec_hdr->ucode_start_addr_hi << 30);
   2636		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
   2637					mec_hdr->ucode_start_addr_hi >> 2);
   2638
   2639		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
   2640		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
   2641		     upper_32_bits(addr));
   2642	}
   2643	mutex_unlock(&adev->srbm_mutex);
   2644	soc21_grbm_select(adev, 0, 0, 0, 0);
   2645
   2646	/* Trigger an invalidation of the L1 instruction caches */
   2647	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
   2648	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
   2649	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
   2650
   2651	/* Wait for invalidation complete */
   2652	for (i = 0; i < usec_timeout; i++) {
   2653		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
   2654		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
   2655				       INVALIDATE_DCACHE_COMPLETE))
   2656			break;
   2657		udelay(1);
   2658	}
   2659
   2660	if (i >= usec_timeout) {
   2661		dev_err(adev->dev, "failed to invalidate instruction cache\n");
   2662		return -EINVAL;
   2663	}
   2664
   2665	/* Trigger an invalidation of the L1 instruction caches */
   2666	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
   2667	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
   2668	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
   2669
   2670	/* Wait for invalidation complete */
   2671	for (i = 0; i < usec_timeout; i++) {
   2672		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
   2673		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
   2674				       INVALIDATE_CACHE_COMPLETE))
   2675			break;
   2676		udelay(1);
   2677	}
   2678
   2679	if (i >= usec_timeout) {
   2680		dev_err(adev->dev, "failed to invalidate instruction cache\n");
   2681		return -EINVAL;
   2682	}
   2683
   2684	return 0;
   2685}
   2686
   2687static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
   2688{
   2689	const struct gfx_firmware_header_v2_0 *pfp_hdr;
   2690	const struct gfx_firmware_header_v2_0 *me_hdr;
   2691	const struct gfx_firmware_header_v2_0 *mec_hdr;
   2692	uint32_t pipe_id, tmp;
   2693
   2694	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
   2695		adev->gfx.mec_fw->data;
   2696	me_hdr = (const struct gfx_firmware_header_v2_0 *)
   2697		adev->gfx.me_fw->data;
   2698	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
   2699		adev->gfx.pfp_fw->data;
   2700
   2701	/* config pfp program start addr */
   2702	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
   2703		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
   2704		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
   2705			(pfp_hdr->ucode_start_addr_hi << 30) |
   2706			(pfp_hdr->ucode_start_addr_lo >> 2));
   2707		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
   2708			pfp_hdr->ucode_start_addr_hi >> 2);
   2709	}
   2710	soc21_grbm_select(adev, 0, 0, 0, 0);
   2711
   2712	/* reset pfp pipe */
   2713	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
   2714	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
   2715	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
   2716	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   2717
   2718	/* clear pfp pipe reset */
   2719	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
   2720	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
   2721	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   2722
   2723	/* config me program start addr */
   2724	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
   2725		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
   2726		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
   2727			(me_hdr->ucode_start_addr_hi << 30) |
   2728			(me_hdr->ucode_start_addr_lo >> 2) );
   2729		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
   2730			me_hdr->ucode_start_addr_hi>>2);
   2731	}
   2732	soc21_grbm_select(adev, 0, 0, 0, 0);
   2733
   2734	/* reset me pipe */
   2735	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
   2736	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
   2737	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
   2738	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   2739
   2740	/* clear me pipe reset */
   2741	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
   2742	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
   2743	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   2744
   2745	/* config mec program start addr */
   2746	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
   2747		soc21_grbm_select(adev, 1, pipe_id, 0, 0);
   2748		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
   2749					mec_hdr->ucode_start_addr_lo >> 2 |
   2750					mec_hdr->ucode_start_addr_hi << 30);
   2751		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
   2752					mec_hdr->ucode_start_addr_hi >> 2);
   2753	}
   2754	soc21_grbm_select(adev, 0, 0, 0, 0);
   2755}
   2756
   2757static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
   2758{
   2759	uint32_t cp_status;
   2760	uint32_t bootload_status;
   2761	int i, r;
   2762	uint64_t addr, addr2;
   2763
   2764	for (i = 0; i < adev->usec_timeout; i++) {
   2765		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
   2766		bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
   2767		if ((cp_status == 0) &&
   2768		    (REG_GET_FIELD(bootload_status,
   2769			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
   2770			break;
   2771		}
   2772		udelay(1);
   2773	}
   2774
   2775	if (i >= adev->usec_timeout) {
   2776		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
   2777		return -ETIMEDOUT;
   2778	}
   2779
   2780	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
   2781		if (adev->gfx.rs64_enable) {
   2782			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
   2783				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
   2784			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
   2785				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
   2786			r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
   2787			if (r)
   2788				return r;
   2789			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
   2790				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
   2791			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
   2792				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
   2793			r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
   2794			if (r)
   2795				return r;
   2796			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
   2797				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
   2798			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
   2799				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
   2800			r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
   2801			if (r)
   2802				return r;
   2803		} else {
   2804			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
   2805				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
   2806			r = gfx_v11_0_config_me_cache(adev, addr);
   2807			if (r)
   2808				return r;
   2809			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
   2810				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
   2811			r = gfx_v11_0_config_pfp_cache(adev, addr);
   2812			if (r)
   2813				return r;
   2814			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
   2815				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
   2816			r = gfx_v11_0_config_mec_cache(adev, addr);
   2817			if (r)
   2818				return r;
   2819		}
   2820	}
   2821
   2822	return 0;
   2823}
   2824
   2825static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
   2826{
   2827	int i;
   2828	u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
   2829
   2830	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
   2831	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
   2832	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   2833
   2834	for (i = 0; i < adev->usec_timeout; i++) {
   2835		if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
   2836			break;
   2837		udelay(1);
   2838	}
   2839
   2840	if (i >= adev->usec_timeout)
   2841		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
   2842
   2843	return 0;
   2844}
   2845
   2846static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
   2847{
   2848	int r;
   2849	const struct gfx_firmware_header_v1_0 *pfp_hdr;
   2850	const __le32 *fw_data;
   2851	unsigned i, fw_size;
   2852
   2853	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
   2854		adev->gfx.pfp_fw->data;
   2855
   2856	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
   2857
   2858	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
   2859		le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
   2860	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
   2861
   2862	r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
   2863				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
   2864				      &adev->gfx.pfp.pfp_fw_obj,
   2865				      &adev->gfx.pfp.pfp_fw_gpu_addr,
   2866				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
   2867	if (r) {
   2868		dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
   2869		gfx_v11_0_pfp_fini(adev);
   2870		return r;
   2871	}
   2872
   2873	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
   2874
   2875	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
   2876	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
   2877
   2878	gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
   2879
   2880	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
   2881
   2882	for (i = 0; i < pfp_hdr->jt_size; i++)
   2883		WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
   2884			     le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
   2885
   2886	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
   2887
   2888	return 0;
   2889}
   2890
   2891static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
   2892{
   2893	int r;
   2894	const struct gfx_firmware_header_v2_0 *pfp_hdr;
   2895	const __le32 *fw_ucode, *fw_data;
   2896	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
   2897	uint32_t tmp;
   2898	uint32_t usec_timeout = 50000;  /* wait for 50ms */
   2899
   2900	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
   2901		adev->gfx.pfp_fw->data;
   2902
   2903	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
   2904
   2905	/* instruction */
   2906	fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
   2907		le32_to_cpu(pfp_hdr->ucode_offset_bytes));
   2908	fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
   2909	/* data */
   2910	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
   2911		le32_to_cpu(pfp_hdr->data_offset_bytes));
   2912	fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
   2913
   2914	/* 64kb align */
   2915	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
   2916				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
   2917				      &adev->gfx.pfp.pfp_fw_obj,
   2918				      &adev->gfx.pfp.pfp_fw_gpu_addr,
   2919				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
   2920	if (r) {
   2921		dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
   2922		gfx_v11_0_pfp_fini(adev);
   2923		return r;
   2924	}
   2925
   2926	r = amdgpu_bo_create_reserved(adev, fw_data_size,
   2927				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
   2928				      &adev->gfx.pfp.pfp_fw_data_obj,
   2929				      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
   2930				      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
   2931	if (r) {
   2932		dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
   2933		gfx_v11_0_pfp_fini(adev);
   2934		return r;
   2935	}
   2936
   2937	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
   2938	memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
   2939
   2940	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
   2941	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
   2942	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
   2943	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
   2944
   2945	if (amdgpu_emu_mode == 1)
   2946		adev->hdp.funcs->flush_hdp(adev, NULL);
   2947
   2948	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
   2949		lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
   2950	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
   2951		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
   2952
   2953	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
   2954	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
   2955	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
   2956	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
   2957	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
   2958
   2959	/*
   2960	 * Programming any of the CP_PFP_IC_BASE registers
   2961	 * forces invalidation of the ME L1 I$. Wait for the
   2962	 * invalidation complete
   2963	 */
   2964	for (i = 0; i < usec_timeout; i++) {
   2965		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
   2966		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
   2967			INVALIDATE_CACHE_COMPLETE))
   2968			break;
   2969		udelay(1);
   2970	}
   2971
   2972	if (i >= usec_timeout) {
   2973		dev_err(adev->dev, "failed to invalidate instruction cache\n");
   2974		return -EINVAL;
   2975	}
   2976
   2977	/* Prime the L1 instruction caches */
   2978	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
   2979	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
   2980	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
   2981	/* Waiting for cache primed*/
   2982	for (i = 0; i < usec_timeout; i++) {
   2983		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
   2984		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
   2985			ICACHE_PRIMED))
   2986			break;
   2987		udelay(1);
   2988	}
   2989
   2990	if (i >= usec_timeout) {
   2991		dev_err(adev->dev, "failed to prime instruction cache\n");
   2992		return -EINVAL;
   2993	}
   2994
   2995	mutex_lock(&adev->srbm_mutex);
   2996	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
   2997		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
   2998		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
   2999			(pfp_hdr->ucode_start_addr_hi << 30) |
   3000			(pfp_hdr->ucode_start_addr_lo >> 2) );
   3001		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
   3002			pfp_hdr->ucode_start_addr_hi>>2);
   3003
   3004		/*
   3005		 * Program CP_ME_CNTL to reset given PIPE to take
   3006		 * effect of CP_PFP_PRGRM_CNTR_START.
   3007		 */
   3008		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
   3009		if (pipe_id == 0)
   3010			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   3011					PFP_PIPE0_RESET, 1);
   3012		else
   3013			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   3014					PFP_PIPE1_RESET, 1);
   3015		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   3016
   3017		/* Clear pfp pipe0 reset bit. */
   3018		if (pipe_id == 0)
   3019			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   3020					PFP_PIPE0_RESET, 0);
   3021		else
   3022			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   3023					PFP_PIPE1_RESET, 0);
   3024		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   3025
   3026		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
   3027			lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
   3028		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
   3029			upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
   3030	}
   3031	soc21_grbm_select(adev, 0, 0, 0, 0);
   3032	mutex_unlock(&adev->srbm_mutex);
   3033
   3034	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
   3035	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
   3036	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
   3037	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
   3038
   3039	/* Invalidate the data caches */
   3040	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
   3041	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
   3042	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
   3043
   3044	for (i = 0; i < usec_timeout; i++) {
   3045		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
   3046		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
   3047			INVALIDATE_DCACHE_COMPLETE))
   3048			break;
   3049		udelay(1);
   3050	}
   3051
   3052	if (i >= usec_timeout) {
   3053		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
   3054		return -EINVAL;
   3055	}
   3056
   3057	return 0;
   3058}
   3059
   3060static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
   3061{
   3062	int r;
   3063	const struct gfx_firmware_header_v1_0 *me_hdr;
   3064	const __le32 *fw_data;
   3065	unsigned i, fw_size;
   3066
   3067	me_hdr = (const struct gfx_firmware_header_v1_0 *)
   3068		adev->gfx.me_fw->data;
   3069
   3070	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
   3071
   3072	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
   3073		le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
   3074	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
   3075
   3076	r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
   3077				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
   3078				      &adev->gfx.me.me_fw_obj,
   3079				      &adev->gfx.me.me_fw_gpu_addr,
   3080				      (void **)&adev->gfx.me.me_fw_ptr);
   3081	if (r) {
   3082		dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
   3083		gfx_v11_0_me_fini(adev);
   3084		return r;
   3085	}
   3086
   3087	memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
   3088
   3089	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
   3090	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
   3091
   3092	gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
   3093
   3094	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
   3095
   3096	for (i = 0; i < me_hdr->jt_size; i++)
   3097		WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
   3098			     le32_to_cpup(fw_data + me_hdr->jt_offset + i));
   3099
   3100	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
   3101
   3102	return 0;
   3103}
   3104
   3105static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
   3106{
   3107	int r;
   3108	const struct gfx_firmware_header_v2_0 *me_hdr;
   3109	const __le32 *fw_ucode, *fw_data;
   3110	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
   3111	uint32_t tmp;
   3112	uint32_t usec_timeout = 50000;  /* wait for 50ms */
   3113
   3114	me_hdr = (const struct gfx_firmware_header_v2_0 *)
   3115		adev->gfx.me_fw->data;
   3116
   3117	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
   3118
   3119	/* instruction */
   3120	fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
   3121		le32_to_cpu(me_hdr->ucode_offset_bytes));
   3122	fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
   3123	/* data */
   3124	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
   3125		le32_to_cpu(me_hdr->data_offset_bytes));
   3126	fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
   3127
   3128	/* 64kb align*/
   3129	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
   3130				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
   3131				      &adev->gfx.me.me_fw_obj,
   3132				      &adev->gfx.me.me_fw_gpu_addr,
   3133				      (void **)&adev->gfx.me.me_fw_ptr);
   3134	if (r) {
   3135		dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
   3136		gfx_v11_0_me_fini(adev);
   3137		return r;
   3138	}
   3139
   3140	r = amdgpu_bo_create_reserved(adev, fw_data_size,
   3141				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
   3142				      &adev->gfx.me.me_fw_data_obj,
   3143				      &adev->gfx.me.me_fw_data_gpu_addr,
   3144				      (void **)&adev->gfx.me.me_fw_data_ptr);
   3145	if (r) {
   3146		dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
   3147		gfx_v11_0_pfp_fini(adev);
   3148		return r;
   3149	}
   3150
   3151	memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
   3152	memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
   3153
   3154	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
   3155	amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
   3156	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
   3157	amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
   3158
   3159	if (amdgpu_emu_mode == 1)
   3160		adev->hdp.funcs->flush_hdp(adev, NULL);
   3161
   3162	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
   3163		lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
   3164	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
   3165		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
   3166
   3167	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
   3168	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
   3169	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
   3170	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
   3171	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
   3172
   3173	/*
   3174	 * Programming any of the CP_ME_IC_BASE registers
   3175	 * forces invalidation of the ME L1 I$. Wait for the
   3176	 * invalidation complete
   3177	 */
   3178	for (i = 0; i < usec_timeout; i++) {
   3179		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
   3180		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
   3181			INVALIDATE_CACHE_COMPLETE))
   3182			break;
   3183		udelay(1);
   3184	}
   3185
   3186	if (i >= usec_timeout) {
   3187		dev_err(adev->dev, "failed to invalidate instruction cache\n");
   3188		return -EINVAL;
   3189	}
   3190
   3191	/* Prime the instruction caches */
   3192	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
   3193	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
   3194	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
   3195
   3196	/* Waiting for instruction cache primed*/
   3197	for (i = 0; i < usec_timeout; i++) {
   3198		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
   3199		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
   3200			ICACHE_PRIMED))
   3201			break;
   3202		udelay(1);
   3203	}
   3204
   3205	if (i >= usec_timeout) {
   3206		dev_err(adev->dev, "failed to prime instruction cache\n");
   3207		return -EINVAL;
   3208	}
   3209
   3210	mutex_lock(&adev->srbm_mutex);
   3211	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
   3212		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
   3213		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
   3214			(me_hdr->ucode_start_addr_hi << 30) |
   3215			(me_hdr->ucode_start_addr_lo >> 2) );
   3216		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
   3217			me_hdr->ucode_start_addr_hi>>2);
   3218
   3219		/*
   3220		 * Program CP_ME_CNTL to reset given PIPE to take
   3221		 * effect of CP_PFP_PRGRM_CNTR_START.
   3222		 */
   3223		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
   3224		if (pipe_id == 0)
   3225			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   3226					ME_PIPE0_RESET, 1);
   3227		else
   3228			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   3229					ME_PIPE1_RESET, 1);
   3230		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   3231
   3232		/* Clear pfp pipe0 reset bit. */
   3233		if (pipe_id == 0)
   3234			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   3235					ME_PIPE0_RESET, 0);
   3236		else
   3237			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
   3238					ME_PIPE1_RESET, 0);
   3239		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
   3240
   3241		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
   3242			lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
   3243		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
   3244			upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
   3245	}
   3246	soc21_grbm_select(adev, 0, 0, 0, 0);
   3247	mutex_unlock(&adev->srbm_mutex);
   3248
   3249	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
   3250	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
   3251	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
   3252	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
   3253
   3254	/* Invalidate the data caches */
   3255	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
   3256	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
   3257	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
   3258
   3259	for (i = 0; i < usec_timeout; i++) {
   3260		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
   3261		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
   3262			INVALIDATE_DCACHE_COMPLETE))
   3263			break;
   3264		udelay(1);
   3265	}
   3266
   3267	if (i >= usec_timeout) {
   3268		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
   3269		return -EINVAL;
   3270	}
   3271
   3272	return 0;
   3273}
   3274
   3275static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
   3276{
   3277	int r;
   3278
   3279	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
   3280		return -EINVAL;
   3281
   3282	gfx_v11_0_cp_gfx_enable(adev, false);
   3283
   3284	if (adev->gfx.rs64_enable)
   3285		r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
   3286	else
   3287		r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
   3288	if (r) {
   3289		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
   3290		return r;
   3291	}
   3292
   3293	if (adev->gfx.rs64_enable)
   3294		r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
   3295	else
   3296		r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
   3297	if (r) {
   3298		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
   3299		return r;
   3300	}
   3301
   3302	return 0;
   3303}
   3304
   3305static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
   3306{
   3307	struct amdgpu_ring *ring;
   3308	const struct cs_section_def *sect = NULL;
   3309	const struct cs_extent_def *ext = NULL;
   3310	int r, i;
   3311	int ctx_reg_offset;
   3312
   3313	/* init the CP */
   3314	WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
   3315		     adev->gfx.config.max_hw_contexts - 1);
   3316	WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
   3317
   3318	if (!amdgpu_async_gfx_ring)
   3319		gfx_v11_0_cp_gfx_enable(adev, true);
   3320
   3321	ring = &adev->gfx.gfx_ring[0];
   3322	r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
   3323	if (r) {
   3324		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
   3325		return r;
   3326	}
   3327
   3328	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   3329	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
   3330
   3331	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   3332	amdgpu_ring_write(ring, 0x80000000);
   3333	amdgpu_ring_write(ring, 0x80000000);
   3334
   3335	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
   3336		for (ext = sect->section; ext->extent != NULL; ++ext) {
   3337			if (sect->id == SECT_CONTEXT) {
   3338				amdgpu_ring_write(ring,
   3339						  PACKET3(PACKET3_SET_CONTEXT_REG,
   3340							  ext->reg_count));
   3341				amdgpu_ring_write(ring, ext->reg_index -
   3342						  PACKET3_SET_CONTEXT_REG_START);
   3343				for (i = 0; i < ext->reg_count; i++)
   3344					amdgpu_ring_write(ring, ext->extent[i]);
   3345			}
   3346		}
   3347	}
   3348
   3349	ctx_reg_offset =
   3350		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
   3351	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
   3352	amdgpu_ring_write(ring, ctx_reg_offset);
   3353	amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
   3354
   3355	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   3356	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
   3357
   3358	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
   3359	amdgpu_ring_write(ring, 0);
   3360
   3361	amdgpu_ring_commit(ring);
   3362
   3363	/* submit cs packet to copy state 0 to next available state */
   3364	if (adev->gfx.num_gfx_rings > 1) {
   3365		/* maximum supported gfx ring is 2 */
   3366		ring = &adev->gfx.gfx_ring[1];
   3367		r = amdgpu_ring_alloc(ring, 2);
   3368		if (r) {
   3369			DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
   3370			return r;
   3371		}
   3372
   3373		amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
   3374		amdgpu_ring_write(ring, 0);
   3375
   3376		amdgpu_ring_commit(ring);
   3377	}
   3378	return 0;
   3379}
   3380
   3381static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
   3382					 CP_PIPE_ID pipe)
   3383{
   3384	u32 tmp;
   3385
   3386	tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
   3387	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
   3388
   3389	WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
   3390}
   3391
   3392static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
   3393					  struct amdgpu_ring *ring)
   3394{
   3395	u32 tmp;
   3396
   3397	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
   3398	if (ring->use_doorbell) {
   3399		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
   3400				    DOORBELL_OFFSET, ring->doorbell_index);
   3401		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
   3402				    DOORBELL_EN, 1);
   3403	} else {
   3404		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
   3405				    DOORBELL_EN, 0);
   3406	}
   3407	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
   3408
   3409	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
   3410			    DOORBELL_RANGE_LOWER, ring->doorbell_index);
   3411	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
   3412
   3413	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
   3414		     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
   3415}
   3416
   3417static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
   3418{
   3419	struct amdgpu_ring *ring;
   3420	u32 tmp;
   3421	u32 rb_bufsz;
   3422	u64 rb_addr, rptr_addr, wptr_gpu_addr;
   3423	u32 i;
   3424
   3425	/* Set the write pointer delay */
   3426	WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
   3427
   3428	/* set the RB to use vmid 0 */
   3429	WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
   3430
   3431	/* Init gfx ring 0 for pipe 0 */
   3432	mutex_lock(&adev->srbm_mutex);
   3433	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
   3434
   3435	/* Set ring buffer size */
   3436	ring = &adev->gfx.gfx_ring[0];
   3437	rb_bufsz = order_base_2(ring->ring_size / 8);
   3438	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
   3439	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
   3440	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
   3441
   3442	/* Initialize the ring buffer's write pointers */
   3443	ring->wptr = 0;
   3444	WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
   3445	WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
   3446
   3447	/* set the wb address wether it's enabled or not */
   3448	rptr_addr = ring->rptr_gpu_addr;
   3449	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
   3450	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
   3451		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
   3452
   3453	wptr_gpu_addr = ring->wptr_gpu_addr;
   3454	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
   3455		     lower_32_bits(wptr_gpu_addr));
   3456	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
   3457		     upper_32_bits(wptr_gpu_addr));
   3458
   3459	mdelay(1);
   3460	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
   3461
   3462	rb_addr = ring->gpu_addr >> 8;
   3463	WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
   3464	WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
   3465
   3466	WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
   3467
   3468	gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
   3469	mutex_unlock(&adev->srbm_mutex);
   3470
   3471	/* Init gfx ring 1 for pipe 1 */
   3472	if (adev->gfx.num_gfx_rings > 1) {
   3473		mutex_lock(&adev->srbm_mutex);
   3474		gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
   3475		/* maximum supported gfx ring is 2 */
   3476		ring = &adev->gfx.gfx_ring[1];
   3477		rb_bufsz = order_base_2(ring->ring_size / 8);
   3478		tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
   3479		tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
   3480		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
   3481		/* Initialize the ring buffer's write pointers */
   3482		ring->wptr = 0;
   3483		WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
   3484		WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
   3485		/* Set the wb address wether it's enabled or not */
   3486		rptr_addr = ring->rptr_gpu_addr;
   3487		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
   3488		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
   3489			     CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
   3490		wptr_gpu_addr = ring->wptr_gpu_addr;
   3491		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
   3492			     lower_32_bits(wptr_gpu_addr));
   3493		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
   3494			     upper_32_bits(wptr_gpu_addr));
   3495
   3496		mdelay(1);
   3497		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
   3498
   3499		rb_addr = ring->gpu_addr >> 8;
   3500		WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
   3501		WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
   3502		WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
   3503
   3504		gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
   3505		mutex_unlock(&adev->srbm_mutex);
   3506	}
   3507	/* Switch to pipe 0 */
   3508	mutex_lock(&adev->srbm_mutex);
   3509	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
   3510	mutex_unlock(&adev->srbm_mutex);
   3511
   3512	/* start the ring */
   3513	gfx_v11_0_cp_gfx_start(adev);
   3514
   3515	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
   3516		ring = &adev->gfx.gfx_ring[i];
   3517		ring->sched.ready = true;
   3518	}
   3519
   3520	return 0;
   3521}
   3522
   3523static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
   3524{
   3525	u32 data;
   3526
   3527	if (adev->gfx.rs64_enable) {
   3528		data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
   3529		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
   3530							 enable ? 0 : 1);
   3531		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
   3532							 enable ? 0 : 1);
   3533		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
   3534							 enable ? 0 : 1);
   3535		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
   3536							 enable ? 0 : 1);
   3537		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
   3538							 enable ? 0 : 1);
   3539		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
   3540							 enable ? 1 : 0);
   3541		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
   3542				                         enable ? 1 : 0);
   3543		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
   3544							 enable ? 1 : 0);
   3545		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
   3546							 enable ? 1 : 0);
   3547		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
   3548							 enable ? 0 : 1);
   3549		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
   3550	} else {
   3551		data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
   3552
   3553		if (enable) {
   3554			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
   3555			if (!adev->enable_mes_kiq)
   3556				data = REG_SET_FIELD(data, CP_MEC_CNTL,
   3557						     MEC_ME2_HALT, 0);
   3558		} else {
   3559			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
   3560			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
   3561		}
   3562		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
   3563	}
   3564
   3565	adev->gfx.kiq.ring.sched.ready = enable;
   3566
   3567	udelay(50);
   3568}
   3569
   3570static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
   3571{
   3572	const struct gfx_firmware_header_v1_0 *mec_hdr;
   3573	const __le32 *fw_data;
   3574	unsigned i, fw_size;
   3575	u32 *fw = NULL;
   3576	int r;
   3577
   3578	if (!adev->gfx.mec_fw)
   3579		return -EINVAL;
   3580
   3581	gfx_v11_0_cp_compute_enable(adev, false);
   3582
   3583	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
   3584	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
   3585
   3586	fw_data = (const __le32 *)
   3587		(adev->gfx.mec_fw->data +
   3588		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
   3589	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
   3590
   3591	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
   3592					  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
   3593					  &adev->gfx.mec.mec_fw_obj,
   3594					  &adev->gfx.mec.mec_fw_gpu_addr,
   3595					  (void **)&fw);
   3596	if (r) {
   3597		dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
   3598		gfx_v11_0_mec_fini(adev);
   3599		return r;
   3600	}
   3601
   3602	memcpy(fw, fw_data, fw_size);
   3603	
   3604	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
   3605	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
   3606
   3607	gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
   3608
   3609	/* MEC1 */
   3610	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
   3611
   3612	for (i = 0; i < mec_hdr->jt_size; i++)
   3613		WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
   3614			     le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
   3615
   3616	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
   3617
   3618	return 0;
   3619}
   3620
   3621static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
   3622{
   3623	const struct gfx_firmware_header_v2_0 *mec_hdr;
   3624	const __le32 *fw_ucode, *fw_data;
   3625	u32 tmp, fw_ucode_size, fw_data_size;
   3626	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
   3627	u32 *fw_ucode_ptr, *fw_data_ptr;
   3628	int r;
   3629
   3630	if (!adev->gfx.mec_fw)
   3631		return -EINVAL;
   3632
   3633	gfx_v11_0_cp_compute_enable(adev, false);
   3634
   3635	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
   3636	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
   3637
   3638	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
   3639				le32_to_cpu(mec_hdr->ucode_offset_bytes));
   3640	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
   3641
   3642	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
   3643				le32_to_cpu(mec_hdr->data_offset_bytes));
   3644	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
   3645
   3646	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
   3647				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
   3648				      &adev->gfx.mec.mec_fw_obj,
   3649				      &adev->gfx.mec.mec_fw_gpu_addr,
   3650				      (void **)&fw_ucode_ptr);
   3651	if (r) {
   3652		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
   3653		gfx_v11_0_mec_fini(adev);
   3654		return r;
   3655	}
   3656
   3657	r = amdgpu_bo_create_reserved(adev, fw_data_size,
   3658				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
   3659				      &adev->gfx.mec.mec_fw_data_obj,
   3660				      &adev->gfx.mec.mec_fw_data_gpu_addr,
   3661				      (void **)&fw_data_ptr);
   3662	if (r) {
   3663		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
   3664		gfx_v11_0_mec_fini(adev);
   3665		return r;
   3666	}
   3667
   3668	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
   3669	memcpy(fw_data_ptr, fw_data, fw_data_size);
   3670
   3671	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
   3672	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
   3673	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
   3674	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
   3675
   3676	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
   3677	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
   3678	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
   3679	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
   3680	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
   3681
   3682	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
   3683	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
   3684	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
   3685	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
   3686
   3687	mutex_lock(&adev->srbm_mutex);
   3688	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
   3689		soc21_grbm_select(adev, 1, i, 0, 0);
   3690
   3691		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
   3692		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
   3693		     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
   3694
   3695		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
   3696					mec_hdr->ucode_start_addr_lo >> 2 |
   3697					mec_hdr->ucode_start_addr_hi << 30);
   3698		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
   3699					mec_hdr->ucode_start_addr_hi >> 2);
   3700
   3701		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
   3702		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
   3703		     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
   3704	}
   3705	mutex_unlock(&adev->srbm_mutex);
   3706	soc21_grbm_select(adev, 0, 0, 0, 0);
   3707
   3708	/* Trigger an invalidation of the L1 instruction caches */
   3709	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
   3710	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
   3711	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
   3712
   3713	/* Wait for invalidation complete */
   3714	for (i = 0; i < usec_timeout; i++) {
   3715		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
   3716		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
   3717				       INVALIDATE_DCACHE_COMPLETE))
   3718			break;
   3719		udelay(1);
   3720	}
   3721
   3722	if (i >= usec_timeout) {
   3723		dev_err(adev->dev, "failed to invalidate instruction cache\n");
   3724		return -EINVAL;
   3725	}
   3726
   3727	/* Trigger an invalidation of the L1 instruction caches */
   3728	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
   3729	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
   3730	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
   3731
   3732	/* Wait for invalidation complete */
   3733	for (i = 0; i < usec_timeout; i++) {
   3734		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
   3735		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
   3736				       INVALIDATE_CACHE_COMPLETE))
   3737			break;
   3738		udelay(1);
   3739	}
   3740
   3741	if (i >= usec_timeout) {
   3742		dev_err(adev->dev, "failed to invalidate instruction cache\n");
   3743		return -EINVAL;
   3744	}
   3745
   3746	return 0;
   3747}
   3748
   3749static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
   3750{
   3751	uint32_t tmp;
   3752	struct amdgpu_device *adev = ring->adev;
   3753
   3754	/* tell RLC which is KIQ queue */
   3755	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
   3756	tmp &= 0xffffff00;
   3757	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
   3758	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
   3759	tmp |= 0x80;
   3760	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
   3761}
   3762
   3763static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
   3764{
   3765	/* set graphics engine doorbell range */
   3766	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
   3767		     (adev->doorbell_index.gfx_ring0 * 2) << 2);
   3768	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
   3769		     (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
   3770
   3771	/* set compute engine doorbell range */
   3772	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
   3773		     (adev->doorbell_index.kiq * 2) << 2);
   3774	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
   3775		     (adev->doorbell_index.userqueue_end * 2) << 2);
   3776}
   3777
   3778static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
   3779				  struct amdgpu_mqd_prop *prop)
   3780{
   3781	struct v11_gfx_mqd *mqd = m;
   3782	uint64_t hqd_gpu_addr, wb_gpu_addr;
   3783	uint32_t tmp;
   3784	uint32_t rb_bufsz;
   3785
   3786	/* set up gfx hqd wptr */
   3787	mqd->cp_gfx_hqd_wptr = 0;
   3788	mqd->cp_gfx_hqd_wptr_hi = 0;
   3789
   3790	/* set the pointer to the MQD */
   3791	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
   3792	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
   3793
   3794	/* set up mqd control */
   3795	tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
   3796	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
   3797	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
   3798	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
   3799	mqd->cp_gfx_mqd_control = tmp;
   3800
   3801	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
   3802	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
   3803	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
   3804	mqd->cp_gfx_hqd_vmid = 0;
   3805
   3806	/* set up default queue priority level
   3807	 * 0x0 = low priority, 0x1 = high priority */
   3808	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
   3809	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
   3810	mqd->cp_gfx_hqd_queue_priority = tmp;
   3811
   3812	/* set up time quantum */
   3813	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
   3814	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
   3815	mqd->cp_gfx_hqd_quantum = tmp;
   3816
   3817	/* set up gfx hqd base. this is similar as CP_RB_BASE */
   3818	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
   3819	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
   3820	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
   3821
   3822	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
   3823	wb_gpu_addr = prop->rptr_gpu_addr;
   3824	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
   3825	mqd->cp_gfx_hqd_rptr_addr_hi =
   3826		upper_32_bits(wb_gpu_addr) & 0xffff;
   3827
   3828	/* set up rb_wptr_poll addr */
   3829	wb_gpu_addr = prop->wptr_gpu_addr;
   3830	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
   3831	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
   3832
   3833	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
   3834	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
   3835	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
   3836	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
   3837	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
   3838#ifdef __BIG_ENDIAN
   3839	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
   3840#endif
   3841	mqd->cp_gfx_hqd_cntl = tmp;
   3842
   3843	/* set up cp_doorbell_control */
   3844	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
   3845	if (prop->use_doorbell) {
   3846		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
   3847				    DOORBELL_OFFSET, prop->doorbell_index);
   3848		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
   3849				    DOORBELL_EN, 1);
   3850	} else
   3851		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
   3852				    DOORBELL_EN, 0);
   3853	mqd->cp_rb_doorbell_control = tmp;
   3854
   3855	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
   3856	mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
   3857
   3858	/* active the queue */
   3859	mqd->cp_gfx_hqd_active = 1;
   3860
   3861	return 0;
   3862}
   3863
   3864#ifdef BRING_UP_DEBUG
   3865static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring)
   3866{
   3867	struct amdgpu_device *adev = ring->adev;
   3868	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
   3869
   3870	/* set mmCP_GFX_HQD_WPTR/_HI to 0 */
   3871	WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
   3872	WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
   3873
   3874	/* set GFX_MQD_BASE */
   3875	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
   3876	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
   3877
   3878	/* set GFX_MQD_CONTROL */
   3879	WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
   3880
   3881	/* set GFX_HQD_VMID to 0 */
   3882	WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
   3883
   3884	WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY,
   3885			mqd->cp_gfx_hqd_queue_priority);
   3886	WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
   3887
   3888	/* set GFX_HQD_BASE, similar as CP_RB_BASE */
   3889	WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
   3890	WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
   3891
   3892	/* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
   3893	WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
   3894	WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
   3895
   3896	/* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
   3897	WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
   3898
   3899	/* set RB_WPTR_POLL_ADDR */
   3900	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
   3901	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
   3902
   3903	/* set RB_DOORBELL_CONTROL */
   3904	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
   3905
   3906	/* active the queue */
   3907	WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
   3908
   3909	return 0;
   3910}
   3911#endif
   3912
   3913static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
   3914{
   3915	struct amdgpu_device *adev = ring->adev;
   3916	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
   3917	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
   3918
   3919	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
   3920		memset((void *)mqd, 0, sizeof(*mqd));
   3921		mutex_lock(&adev->srbm_mutex);
   3922		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   3923		amdgpu_ring_init_mqd(ring);
   3924#ifdef BRING_UP_DEBUG
   3925		gfx_v11_0_gfx_queue_init_register(ring);
   3926#endif
   3927		soc21_grbm_select(adev, 0, 0, 0, 0);
   3928		mutex_unlock(&adev->srbm_mutex);
   3929		if (adev->gfx.me.mqd_backup[mqd_idx])
   3930			memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
   3931	} else if (amdgpu_in_reset(adev)) {
   3932		/* reset mqd with the backup copy */
   3933		if (adev->gfx.me.mqd_backup[mqd_idx])
   3934			memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
   3935		/* reset the ring */
   3936		ring->wptr = 0;
   3937		*ring->wptr_cpu_addr = 0;
   3938		amdgpu_ring_clear_ring(ring);
   3939#ifdef BRING_UP_DEBUG
   3940		mutex_lock(&adev->srbm_mutex);
   3941		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   3942		gfx_v11_0_gfx_queue_init_register(ring);
   3943		soc21_grbm_select(adev, 0, 0, 0, 0);
   3944		mutex_unlock(&adev->srbm_mutex);
   3945#endif
   3946	} else {
   3947		amdgpu_ring_clear_ring(ring);
   3948	}
   3949
   3950	return 0;
   3951}
   3952
   3953#ifndef BRING_UP_DEBUG
   3954static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev)
   3955{
   3956	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
   3957	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
   3958	int r, i;
   3959
   3960	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
   3961		return -EINVAL;
   3962
   3963	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
   3964					adev->gfx.num_gfx_rings);
   3965	if (r) {
   3966		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
   3967		return r;
   3968	}
   3969
   3970	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
   3971		kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
   3972
   3973	return amdgpu_ring_test_helper(kiq_ring);
   3974}
   3975#endif
   3976
   3977static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
   3978{
   3979	int r, i;
   3980	struct amdgpu_ring *ring;
   3981
   3982	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
   3983		ring = &adev->gfx.gfx_ring[i];
   3984
   3985		r = amdgpu_bo_reserve(ring->mqd_obj, false);
   3986		if (unlikely(r != 0))
   3987			goto done;
   3988
   3989		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
   3990		if (!r) {
   3991			r = gfx_v11_0_gfx_init_queue(ring);
   3992			amdgpu_bo_kunmap(ring->mqd_obj);
   3993			ring->mqd_ptr = NULL;
   3994		}
   3995		amdgpu_bo_unreserve(ring->mqd_obj);
   3996		if (r)
   3997			goto done;
   3998	}
   3999#ifndef BRING_UP_DEBUG
   4000	r = gfx_v11_0_kiq_enable_kgq(adev);
   4001	if (r)
   4002		goto done;
   4003#endif
   4004	r = gfx_v11_0_cp_gfx_start(adev);
   4005	if (r)
   4006		goto done;
   4007
   4008	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
   4009		ring = &adev->gfx.gfx_ring[i];
   4010		ring->sched.ready = true;
   4011	}
   4012done:
   4013	return r;
   4014}
   4015
   4016static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
   4017				      struct amdgpu_mqd_prop *prop)
   4018{
   4019	struct v11_compute_mqd *mqd = m;
   4020	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
   4021	uint32_t tmp;
   4022
   4023	mqd->header = 0xC0310800;
   4024	mqd->compute_pipelinestat_enable = 0x00000001;
   4025	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
   4026	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
   4027	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
   4028	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
   4029	mqd->compute_misc_reserved = 0x00000007;
   4030
   4031	eop_base_addr = prop->eop_gpu_addr >> 8;
   4032	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
   4033	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
   4034
   4035	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
   4036	tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
   4037	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
   4038			(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
   4039
   4040	mqd->cp_hqd_eop_control = tmp;
   4041
   4042	/* enable doorbell? */
   4043	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
   4044
   4045	if (prop->use_doorbell) {
   4046		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4047				    DOORBELL_OFFSET, prop->doorbell_index);
   4048		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4049				    DOORBELL_EN, 1);
   4050		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4051				    DOORBELL_SOURCE, 0);
   4052		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4053				    DOORBELL_HIT, 0);
   4054	} else {
   4055		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4056				    DOORBELL_EN, 0);
   4057	}
   4058
   4059	mqd->cp_hqd_pq_doorbell_control = tmp;
   4060
   4061	/* disable the queue if it's active */
   4062	mqd->cp_hqd_dequeue_request = 0;
   4063	mqd->cp_hqd_pq_rptr = 0;
   4064	mqd->cp_hqd_pq_wptr_lo = 0;
   4065	mqd->cp_hqd_pq_wptr_hi = 0;
   4066
   4067	/* set the pointer to the MQD */
   4068	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
   4069	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
   4070
   4071	/* set MQD vmid to 0 */
   4072	tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
   4073	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
   4074	mqd->cp_mqd_control = tmp;
   4075
   4076	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
   4077	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
   4078	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
   4079	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
   4080
   4081	/* set up the HQD, this is similar to CP_RB0_CNTL */
   4082	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
   4083	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
   4084			    (order_base_2(prop->queue_size / 4) - 1));
   4085	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
   4086			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
   4087	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
   4088	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
   4089	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
   4090	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
   4091	mqd->cp_hqd_pq_control = tmp;
   4092
   4093	/* set the wb address whether it's enabled or not */
   4094	wb_gpu_addr = prop->rptr_gpu_addr;
   4095	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
   4096	mqd->cp_hqd_pq_rptr_report_addr_hi =
   4097		upper_32_bits(wb_gpu_addr) & 0xffff;
   4098
   4099	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
   4100	wb_gpu_addr = prop->wptr_gpu_addr;
   4101	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
   4102	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
   4103
   4104	tmp = 0;
   4105	/* enable the doorbell if requested */
   4106	if (prop->use_doorbell) {
   4107		tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
   4108		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4109				DOORBELL_OFFSET, prop->doorbell_index);
   4110
   4111		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4112				    DOORBELL_EN, 1);
   4113		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4114				    DOORBELL_SOURCE, 0);
   4115		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4116				    DOORBELL_HIT, 0);
   4117	}
   4118
   4119	mqd->cp_hqd_pq_doorbell_control = tmp;
   4120
   4121	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
   4122	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
   4123
   4124	/* set the vmid for the queue */
   4125	mqd->cp_hqd_vmid = 0;
   4126
   4127	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
   4128	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
   4129	mqd->cp_hqd_persistent_state = tmp;
   4130
   4131	/* set MIN_IB_AVAIL_SIZE */
   4132	tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
   4133	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
   4134	mqd->cp_hqd_ib_control = tmp;
   4135
   4136	/* set static priority for a compute queue/ring */
   4137	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
   4138	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
   4139
   4140	mqd->cp_hqd_active = prop->hqd_active;
   4141
   4142	return 0;
   4143}
   4144
   4145static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
   4146{
   4147	struct amdgpu_device *adev = ring->adev;
   4148	struct v11_compute_mqd *mqd = ring->mqd_ptr;
   4149	int j;
   4150
   4151	/* inactivate the queue */
   4152	if (amdgpu_sriov_vf(adev))
   4153		WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
   4154
   4155	/* disable wptr polling */
   4156	WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
   4157
   4158	/* write the EOP addr */
   4159	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
   4160	       mqd->cp_hqd_eop_base_addr_lo);
   4161	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
   4162	       mqd->cp_hqd_eop_base_addr_hi);
   4163
   4164	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
   4165	WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
   4166	       mqd->cp_hqd_eop_control);
   4167
   4168	/* enable doorbell? */
   4169	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
   4170	       mqd->cp_hqd_pq_doorbell_control);
   4171
   4172	/* disable the queue if it's active */
   4173	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
   4174		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
   4175		for (j = 0; j < adev->usec_timeout; j++) {
   4176			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
   4177				break;
   4178			udelay(1);
   4179		}
   4180		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
   4181		       mqd->cp_hqd_dequeue_request);
   4182		WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
   4183		       mqd->cp_hqd_pq_rptr);
   4184		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
   4185		       mqd->cp_hqd_pq_wptr_lo);
   4186		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
   4187		       mqd->cp_hqd_pq_wptr_hi);
   4188	}
   4189
   4190	/* set the pointer to the MQD */
   4191	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
   4192	       mqd->cp_mqd_base_addr_lo);
   4193	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
   4194	       mqd->cp_mqd_base_addr_hi);
   4195
   4196	/* set MQD vmid to 0 */
   4197	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
   4198	       mqd->cp_mqd_control);
   4199
   4200	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
   4201	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
   4202	       mqd->cp_hqd_pq_base_lo);
   4203	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
   4204	       mqd->cp_hqd_pq_base_hi);
   4205
   4206	/* set up the HQD, this is similar to CP_RB0_CNTL */
   4207	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
   4208	       mqd->cp_hqd_pq_control);
   4209
   4210	/* set the wb address whether it's enabled or not */
   4211	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
   4212		mqd->cp_hqd_pq_rptr_report_addr_lo);
   4213	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
   4214		mqd->cp_hqd_pq_rptr_report_addr_hi);
   4215
   4216	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
   4217	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
   4218	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
   4219	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
   4220	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
   4221
   4222	/* enable the doorbell if requested */
   4223	if (ring->use_doorbell) {
   4224		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
   4225			(adev->doorbell_index.kiq * 2) << 2);
   4226		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
   4227			(adev->doorbell_index.userqueue_end * 2) << 2);
   4228	}
   4229
   4230	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
   4231	       mqd->cp_hqd_pq_doorbell_control);
   4232
   4233	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
   4234	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
   4235	       mqd->cp_hqd_pq_wptr_lo);
   4236	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
   4237	       mqd->cp_hqd_pq_wptr_hi);
   4238
   4239	/* set the vmid for the queue */
   4240	WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
   4241
   4242	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
   4243	       mqd->cp_hqd_persistent_state);
   4244
   4245	/* activate the queue */
   4246	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
   4247	       mqd->cp_hqd_active);
   4248
   4249	if (ring->use_doorbell)
   4250		WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
   4251
   4252	return 0;
   4253}
   4254
   4255static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
   4256{
   4257	struct amdgpu_device *adev = ring->adev;
   4258	struct v11_compute_mqd *mqd = ring->mqd_ptr;
   4259	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
   4260
   4261	gfx_v11_0_kiq_setting(ring);
   4262
   4263	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
   4264		/* reset MQD to a clean status */
   4265		if (adev->gfx.mec.mqd_backup[mqd_idx])
   4266			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
   4267
   4268		/* reset ring buffer */
   4269		ring->wptr = 0;
   4270		amdgpu_ring_clear_ring(ring);
   4271
   4272		mutex_lock(&adev->srbm_mutex);
   4273		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   4274		gfx_v11_0_kiq_init_register(ring);
   4275		soc21_grbm_select(adev, 0, 0, 0, 0);
   4276		mutex_unlock(&adev->srbm_mutex);
   4277	} else {
   4278		memset((void *)mqd, 0, sizeof(*mqd));
   4279		mutex_lock(&adev->srbm_mutex);
   4280		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   4281		amdgpu_ring_init_mqd(ring);
   4282		gfx_v11_0_kiq_init_register(ring);
   4283		soc21_grbm_select(adev, 0, 0, 0, 0);
   4284		mutex_unlock(&adev->srbm_mutex);
   4285
   4286		if (adev->gfx.mec.mqd_backup[mqd_idx])
   4287			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
   4288	}
   4289
   4290	return 0;
   4291}
   4292
   4293static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
   4294{
   4295	struct amdgpu_device *adev = ring->adev;
   4296	struct v11_compute_mqd *mqd = ring->mqd_ptr;
   4297	int mqd_idx = ring - &adev->gfx.compute_ring[0];
   4298
   4299	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
   4300		memset((void *)mqd, 0, sizeof(*mqd));
   4301		mutex_lock(&adev->srbm_mutex);
   4302		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   4303		amdgpu_ring_init_mqd(ring);
   4304		soc21_grbm_select(adev, 0, 0, 0, 0);
   4305		mutex_unlock(&adev->srbm_mutex);
   4306
   4307		if (adev->gfx.mec.mqd_backup[mqd_idx])
   4308			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
   4309	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
   4310		/* reset MQD to a clean status */
   4311		if (adev->gfx.mec.mqd_backup[mqd_idx])
   4312			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
   4313
   4314		/* reset ring buffer */
   4315		ring->wptr = 0;
   4316		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
   4317		amdgpu_ring_clear_ring(ring);
   4318	} else {
   4319		amdgpu_ring_clear_ring(ring);
   4320	}
   4321
   4322	return 0;
   4323}
   4324
   4325static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
   4326{
   4327	struct amdgpu_ring *ring;
   4328	int r;
   4329
   4330	ring = &adev->gfx.kiq.ring;
   4331
   4332	r = amdgpu_bo_reserve(ring->mqd_obj, false);
   4333	if (unlikely(r != 0))
   4334		return r;
   4335
   4336	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
   4337	if (unlikely(r != 0)) {
   4338		amdgpu_bo_unreserve(ring->mqd_obj);
   4339		return r;
   4340	}
   4341
   4342	gfx_v11_0_kiq_init_queue(ring);
   4343	amdgpu_bo_kunmap(ring->mqd_obj);
   4344	ring->mqd_ptr = NULL;
   4345	amdgpu_bo_unreserve(ring->mqd_obj);
   4346	ring->sched.ready = true;
   4347	return 0;
   4348}
   4349
   4350static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
   4351{
   4352	struct amdgpu_ring *ring = NULL;
   4353	int r = 0, i;
   4354
   4355	if (!amdgpu_async_gfx_ring)
   4356		gfx_v11_0_cp_compute_enable(adev, true);
   4357
   4358	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   4359		ring = &adev->gfx.compute_ring[i];
   4360
   4361		r = amdgpu_bo_reserve(ring->mqd_obj, false);
   4362		if (unlikely(r != 0))
   4363			goto done;
   4364		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
   4365		if (!r) {
   4366			r = gfx_v11_0_kcq_init_queue(ring);
   4367			amdgpu_bo_kunmap(ring->mqd_obj);
   4368			ring->mqd_ptr = NULL;
   4369		}
   4370		amdgpu_bo_unreserve(ring->mqd_obj);
   4371		if (r)
   4372			goto done;
   4373	}
   4374
   4375	r = amdgpu_gfx_enable_kcq(adev);
   4376done:
   4377	return r;
   4378}
   4379
   4380static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
   4381{
   4382	int r, i;
   4383	struct amdgpu_ring *ring;
   4384
   4385	if (!(adev->flags & AMD_IS_APU))
   4386		gfx_v11_0_enable_gui_idle_interrupt(adev, false);
   4387
   4388	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
   4389		/* legacy firmware loading */
   4390		r = gfx_v11_0_cp_gfx_load_microcode(adev);
   4391		if (r)
   4392			return r;
   4393
   4394		if (adev->gfx.rs64_enable)
   4395			r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
   4396		else
   4397			r = gfx_v11_0_cp_compute_load_microcode(adev);
   4398		if (r)
   4399			return r;
   4400	}
   4401
   4402	gfx_v11_0_cp_set_doorbell_range(adev);
   4403
   4404	if (amdgpu_async_gfx_ring) {
   4405		gfx_v11_0_cp_compute_enable(adev, true);
   4406		gfx_v11_0_cp_gfx_enable(adev, true);
   4407	}
   4408
   4409	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
   4410		r = amdgpu_mes_kiq_hw_init(adev);
   4411	else
   4412		r = gfx_v11_0_kiq_resume(adev);
   4413	if (r)
   4414		return r;
   4415
   4416	r = gfx_v11_0_kcq_resume(adev);
   4417	if (r)
   4418		return r;
   4419
   4420	if (!amdgpu_async_gfx_ring) {
   4421		r = gfx_v11_0_cp_gfx_resume(adev);
   4422		if (r)
   4423			return r;
   4424	} else {
   4425		r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
   4426		if (r)
   4427			return r;
   4428	}
   4429
   4430	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
   4431		ring = &adev->gfx.gfx_ring[i];
   4432		r = amdgpu_ring_test_helper(ring);
   4433		if (r)
   4434			return r;
   4435	}
   4436
   4437	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   4438		ring = &adev->gfx.compute_ring[i];
   4439		r = amdgpu_ring_test_helper(ring);
   4440		if (r)
   4441			return r;
   4442	}
   4443
   4444	return 0;
   4445}
   4446
   4447static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
   4448{
   4449	gfx_v11_0_cp_gfx_enable(adev, enable);
   4450	gfx_v11_0_cp_compute_enable(adev, enable);
   4451}
   4452
   4453static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
   4454{
   4455	int r;
   4456	bool value;
   4457
   4458	r = adev->gfxhub.funcs->gart_enable(adev);
   4459	if (r)
   4460		return r;
   4461
   4462	adev->hdp.funcs->flush_hdp(adev, NULL);
   4463
   4464	value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
   4465		false : true;
   4466
   4467	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
   4468	amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
   4469
   4470	return 0;
   4471}
   4472
   4473static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
   4474{
   4475	u32 tmp;
   4476
   4477	/* select RS64 */
   4478	if (adev->gfx.rs64_enable) {
   4479		tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
   4480		tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
   4481		WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
   4482
   4483		tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
   4484		tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
   4485		WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
   4486	}
   4487
   4488	if (amdgpu_emu_mode == 1)
   4489		msleep(100);
   4490}
   4491
   4492static int get_gb_addr_config(struct amdgpu_device * adev)
   4493{
   4494	u32 gb_addr_config;
   4495
   4496	gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
   4497	if (gb_addr_config == 0)
   4498		return -EINVAL;
   4499
   4500	adev->gfx.config.gb_addr_config_fields.num_pkrs =
   4501		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
   4502
   4503	adev->gfx.config.gb_addr_config = gb_addr_config;
   4504
   4505	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
   4506			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
   4507				      GB_ADDR_CONFIG, NUM_PIPES);
   4508
   4509	adev->gfx.config.max_tile_pipes =
   4510		adev->gfx.config.gb_addr_config_fields.num_pipes;
   4511
   4512	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
   4513			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
   4514				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
   4515	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
   4516			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
   4517				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
   4518	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
   4519			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
   4520				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
   4521	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
   4522			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
   4523				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
   4524
   4525	return 0;
   4526}
   4527
   4528static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
   4529{
   4530	uint32_t data;
   4531
   4532	data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
   4533	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
   4534	WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
   4535
   4536	data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
   4537	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
   4538	WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
   4539}
   4540
   4541static int gfx_v11_0_hw_init(void *handle)
   4542{
   4543	int r;
   4544	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4545
   4546	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
   4547		if (adev->gfx.imu.funcs) {
   4548			/* RLC autoload sequence 1: Program rlc ram */
   4549			if (adev->gfx.imu.funcs->program_rlc_ram)
   4550				adev->gfx.imu.funcs->program_rlc_ram(adev);
   4551		}
   4552		/* rlc autoload firmware */
   4553		r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
   4554		if (r)
   4555			return r;
   4556	} else {
   4557		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
   4558			if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
   4559				if (adev->gfx.imu.funcs->load_microcode)
   4560					adev->gfx.imu.funcs->load_microcode(adev);
   4561				if (adev->gfx.imu.funcs->setup_imu)
   4562					adev->gfx.imu.funcs->setup_imu(adev);
   4563				if (adev->gfx.imu.funcs->start_imu)
   4564					adev->gfx.imu.funcs->start_imu(adev);
   4565			}
   4566		}
   4567	}
   4568
   4569	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
   4570	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
   4571		r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
   4572		if (r) {
   4573			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
   4574			return r;
   4575		}
   4576	}
   4577
   4578	adev->gfx.is_poweron = true;
   4579
   4580	if(get_gb_addr_config(adev))
   4581		DRM_WARN("Invalid gb_addr_config !\n");
   4582
   4583	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
   4584	    adev->gfx.rs64_enable)
   4585		gfx_v11_0_config_gfx_rs64(adev);
   4586
   4587	r = gfx_v11_0_gfxhub_enable(adev);
   4588	if (r)
   4589		return r;
   4590
   4591	if (!amdgpu_emu_mode)
   4592		gfx_v11_0_init_golden_registers(adev);
   4593
   4594	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
   4595	    (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
   4596		/**
   4597		 * For gfx 11, rlc firmware loading relies on smu firmware is
   4598		 * loaded firstly, so in direct type, it has to load smc ucode
   4599		 * here before rlc.
   4600		 */
   4601		if (!(adev->flags & AMD_IS_APU)) {
   4602			r = amdgpu_pm_load_smu_firmware(adev, NULL);
   4603			if (r)
   4604				return r;
   4605		}
   4606	}
   4607
   4608	gfx_v11_0_constants_init(adev);
   4609
   4610	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
   4611		gfx_v11_0_select_cp_fw_arch(adev);
   4612
   4613	if (adev->nbio.funcs->gc_doorbell_init)
   4614		adev->nbio.funcs->gc_doorbell_init(adev);
   4615
   4616	r = gfx_v11_0_rlc_resume(adev);
   4617	if (r)
   4618		return r;
   4619
   4620	/*
   4621	 * init golden registers and rlc resume may override some registers,
   4622	 * reconfig them here
   4623	 */
   4624	gfx_v11_0_tcp_harvest(adev);
   4625
   4626	r = gfx_v11_0_cp_resume(adev);
   4627	if (r)
   4628		return r;
   4629
   4630	return r;
   4631}
   4632
   4633#ifndef BRING_UP_DEBUG
   4634static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev)
   4635{
   4636	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
   4637	struct amdgpu_ring *kiq_ring = &kiq->ring;
   4638	int i, r = 0;
   4639
   4640	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
   4641		return -EINVAL;
   4642
   4643	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
   4644					adev->gfx.num_gfx_rings))
   4645		return -ENOMEM;
   4646
   4647	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
   4648		kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
   4649					   PREEMPT_QUEUES, 0, 0);
   4650
   4651	if (adev->gfx.kiq.ring.sched.ready)
   4652		r = amdgpu_ring_test_helper(kiq_ring);
   4653
   4654	return r;
   4655}
   4656#endif
   4657
   4658static int gfx_v11_0_hw_fini(void *handle)
   4659{
   4660	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4661	int r;
   4662	uint32_t tmp;
   4663
   4664	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
   4665	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
   4666
   4667	if (!adev->no_hw_access) {
   4668#ifndef BRING_UP_DEBUG
   4669		if (amdgpu_async_gfx_ring) {
   4670			r = gfx_v11_0_kiq_disable_kgq(adev);
   4671			if (r)
   4672				DRM_ERROR("KGQ disable failed\n");
   4673		}
   4674#endif
   4675		if (amdgpu_gfx_disable_kcq(adev))
   4676			DRM_ERROR("KCQ disable failed\n");
   4677
   4678		amdgpu_mes_kiq_hw_fini(adev);
   4679	}
   4680
   4681	if (amdgpu_sriov_vf(adev)) {
   4682		gfx_v11_0_cp_gfx_enable(adev, false);
   4683		/* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
   4684		tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
   4685		tmp &= 0xffffff00;
   4686		WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
   4687
   4688		return 0;
   4689	}
   4690	gfx_v11_0_cp_enable(adev, false);
   4691	gfx_v11_0_enable_gui_idle_interrupt(adev, false);
   4692
   4693	adev->gfxhub.funcs->gart_disable(adev);
   4694
   4695	adev->gfx.is_poweron = false;
   4696
   4697	return 0;
   4698}
   4699
   4700static int gfx_v11_0_suspend(void *handle)
   4701{
   4702	return gfx_v11_0_hw_fini(handle);
   4703}
   4704
   4705static int gfx_v11_0_resume(void *handle)
   4706{
   4707	return gfx_v11_0_hw_init(handle);
   4708}
   4709
   4710static bool gfx_v11_0_is_idle(void *handle)
   4711{
   4712	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4713
   4714	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
   4715				GRBM_STATUS, GUI_ACTIVE))
   4716		return false;
   4717	else
   4718		return true;
   4719}
   4720
   4721static int gfx_v11_0_wait_for_idle(void *handle)
   4722{
   4723	unsigned i;
   4724	u32 tmp;
   4725	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4726
   4727	for (i = 0; i < adev->usec_timeout; i++) {
   4728		/* read MC_STATUS */
   4729		tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
   4730			GRBM_STATUS__GUI_ACTIVE_MASK;
   4731
   4732		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
   4733			return 0;
   4734		udelay(1);
   4735	}
   4736	return -ETIMEDOUT;
   4737}
   4738
   4739static int gfx_v11_0_soft_reset(void *handle)
   4740{
   4741	u32 grbm_soft_reset = 0;
   4742	u32 tmp;
   4743	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4744
   4745	/* GRBM_STATUS */
   4746	tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS);
   4747	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
   4748		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
   4749		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK |
   4750		   GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK |
   4751		   GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK)) {
   4752		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
   4753						GRBM_SOFT_RESET, SOFT_RESET_CP,
   4754						1);
   4755		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
   4756						GRBM_SOFT_RESET, SOFT_RESET_GFX,
   4757						1);
   4758	}
   4759
   4760	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
   4761		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
   4762						GRBM_SOFT_RESET, SOFT_RESET_CP,
   4763						1);
   4764	}
   4765
   4766	/* GRBM_STATUS2 */
   4767	tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS2);
   4768	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
   4769		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
   4770						GRBM_SOFT_RESET,
   4771						SOFT_RESET_RLC,
   4772						1);
   4773
   4774	if (grbm_soft_reset) {
   4775		/* stop the rlc */
   4776		gfx_v11_0_rlc_stop(adev);
   4777
   4778		/* Disable GFX parsing/prefetching */
   4779		gfx_v11_0_cp_gfx_enable(adev, false);
   4780
   4781		/* Disable MEC parsing/prefetching */
   4782		gfx_v11_0_cp_compute_enable(adev, false);
   4783
   4784		if (grbm_soft_reset) {
   4785			tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
   4786			tmp |= grbm_soft_reset;
   4787			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
   4788			WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
   4789			tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
   4790
   4791			udelay(50);
   4792
   4793			tmp &= ~grbm_soft_reset;
   4794			WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
   4795			tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
   4796		}
   4797
   4798		/* Wait a little for things to settle down */
   4799		udelay(50);
   4800	}
   4801	return 0;
   4802}
   4803
   4804static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
   4805{
   4806	uint64_t clock;
   4807
   4808	amdgpu_gfx_off_ctrl(adev, false);
   4809	mutex_lock(&adev->gfx.gpu_clock_mutex);
   4810	clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) |
   4811		((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL);
   4812	mutex_unlock(&adev->gfx.gpu_clock_mutex);
   4813	amdgpu_gfx_off_ctrl(adev, true);
   4814	return clock;
   4815}
   4816
   4817static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
   4818					   uint32_t vmid,
   4819					   uint32_t gds_base, uint32_t gds_size,
   4820					   uint32_t gws_base, uint32_t gws_size,
   4821					   uint32_t oa_base, uint32_t oa_size)
   4822{
   4823	struct amdgpu_device *adev = ring->adev;
   4824
   4825	/* GDS Base */
   4826	gfx_v11_0_write_data_to_reg(ring, 0, false,
   4827				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
   4828				    gds_base);
   4829
   4830	/* GDS Size */
   4831	gfx_v11_0_write_data_to_reg(ring, 0, false,
   4832				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
   4833				    gds_size);
   4834
   4835	/* GWS */
   4836	gfx_v11_0_write_data_to_reg(ring, 0, false,
   4837				    SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
   4838				    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
   4839
   4840	/* OA */
   4841	gfx_v11_0_write_data_to_reg(ring, 0, false,
   4842				    SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
   4843				    (1 << (oa_size + oa_base)) - (1 << oa_base));
   4844}
   4845
   4846static int gfx_v11_0_early_init(void *handle)
   4847{
   4848	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4849
   4850	adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
   4851	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
   4852					  AMDGPU_MAX_COMPUTE_RINGS);
   4853
   4854	gfx_v11_0_set_kiq_pm4_funcs(adev);
   4855	gfx_v11_0_set_ring_funcs(adev);
   4856	gfx_v11_0_set_irq_funcs(adev);
   4857	gfx_v11_0_set_gds_init(adev);
   4858	gfx_v11_0_set_rlc_funcs(adev);
   4859	gfx_v11_0_set_mqd_funcs(adev);
   4860	gfx_v11_0_set_imu_funcs(adev);
   4861
   4862	gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
   4863
   4864	return 0;
   4865}
   4866
   4867static int gfx_v11_0_late_init(void *handle)
   4868{
   4869	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4870	int r;
   4871
   4872	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
   4873	if (r)
   4874		return r;
   4875
   4876	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
   4877	if (r)
   4878		return r;
   4879
   4880	return 0;
   4881}
   4882
   4883static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
   4884{
   4885	uint32_t rlc_cntl;
   4886
   4887	/* if RLC is not enabled, do nothing */
   4888	rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
   4889	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
   4890}
   4891
   4892static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev)
   4893{
   4894	uint32_t data;
   4895	unsigned i;
   4896
   4897	data = RLC_SAFE_MODE__CMD_MASK;
   4898	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
   4899
   4900	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
   4901
   4902	/* wait for RLC_SAFE_MODE */
   4903	for (i = 0; i < adev->usec_timeout; i++) {
   4904		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
   4905				   RLC_SAFE_MODE, CMD))
   4906			break;
   4907		udelay(1);
   4908	}
   4909}
   4910
   4911static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev)
   4912{
   4913	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
   4914}
   4915
   4916static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
   4917				      bool enable)
   4918{
   4919	uint32_t def, data;
   4920
   4921	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
   4922		return;
   4923
   4924	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
   4925
   4926	if (enable)
   4927		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
   4928	else
   4929		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
   4930
   4931	if (def != data)
   4932		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
   4933}
   4934
   4935static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
   4936				       bool enable)
   4937{
   4938	uint32_t def, data;
   4939
   4940	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
   4941		return;
   4942
   4943	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
   4944
   4945	if (enable)
   4946		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
   4947	else
   4948		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
   4949
   4950	if (def != data)
   4951		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
   4952}
   4953
   4954static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
   4955					   bool enable)
   4956{
   4957	uint32_t def, data;
   4958
   4959	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
   4960		return;
   4961
   4962	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
   4963
   4964	if (enable)
   4965		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
   4966	else
   4967		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
   4968
   4969	if (def != data)
   4970		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
   4971}
   4972
   4973static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
   4974						       bool enable)
   4975{
   4976	uint32_t data, def;
   4977
   4978	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
   4979		return;
   4980
   4981	/* It is disabled by HW by default */
   4982	if (enable) {
   4983		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
   4984			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
   4985			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
   4986
   4987			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
   4988				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
   4989				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
   4990
   4991			if (def != data)
   4992				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
   4993		}
   4994	} else {
   4995		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
   4996			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
   4997
   4998			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
   4999				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
   5000				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
   5001
   5002			if (def != data)
   5003				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
   5004		}
   5005	}
   5006}
   5007
   5008static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
   5009						       bool enable)
   5010{
   5011	uint32_t def, data;
   5012
   5013	if (!(adev->cg_flags &
   5014	      (AMD_CG_SUPPORT_GFX_CGCG |
   5015	      AMD_CG_SUPPORT_GFX_CGLS |
   5016	      AMD_CG_SUPPORT_GFX_3D_CGCG |
   5017	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
   5018		return;
   5019
   5020	if (enable) {
   5021		def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
   5022
   5023		/* unset CGCG override */
   5024		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
   5025			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
   5026		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
   5027			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
   5028		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
   5029		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
   5030			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
   5031
   5032		/* update CGCG override bits */
   5033		if (def != data)
   5034			WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
   5035
   5036		/* enable cgcg FSM(0x0000363F) */
   5037		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
   5038
   5039		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
   5040			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
   5041			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
   5042				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
   5043		}
   5044
   5045		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
   5046			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
   5047			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
   5048				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
   5049		}
   5050
   5051		if (def != data)
   5052			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
   5053
   5054		/* Program RLC_CGCG_CGLS_CTRL_3D */
   5055		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
   5056
   5057		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
   5058			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
   5059			data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
   5060				 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
   5061		}
   5062
   5063		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
   5064			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
   5065			data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
   5066				 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
   5067		}
   5068
   5069		if (def != data)
   5070			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
   5071
   5072		/* set IDLE_POLL_COUNT(0x00900100) */
   5073		def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
   5074
   5075		data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
   5076		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
   5077			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
   5078
   5079		if (def != data)
   5080			WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
   5081
   5082		data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
   5083		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
   5084		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
   5085		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
   5086		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
   5087		WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
   5088
   5089		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
   5090		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
   5091		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
   5092
   5093		data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
   5094		data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
   5095		WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
   5096	} else {
   5097		/* Program RLC_CGCG_CGLS_CTRL */
   5098		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
   5099
   5100		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
   5101			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
   5102
   5103		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
   5104			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
   5105
   5106		if (def != data)
   5107			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
   5108
   5109		/* Program RLC_CGCG_CGLS_CTRL_3D */
   5110		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
   5111
   5112		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
   5113			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
   5114		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
   5115			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
   5116
   5117		if (def != data)
   5118			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
   5119
   5120		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
   5121		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
   5122		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
   5123
   5124		data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
   5125		data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
   5126		WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
   5127	}
   5128}
   5129
   5130static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
   5131					    bool enable)
   5132{
   5133	amdgpu_gfx_rlc_enter_safe_mode(adev);
   5134
   5135	gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
   5136
   5137	gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
   5138
   5139	gfx_v11_0_update_repeater_fgcg(adev, enable);
   5140
   5141	gfx_v11_0_update_sram_fgcg(adev, enable);
   5142
   5143	gfx_v11_0_update_perf_clk(adev, enable);
   5144
   5145	if (adev->cg_flags &
   5146	    (AMD_CG_SUPPORT_GFX_MGCG |
   5147	     AMD_CG_SUPPORT_GFX_CGLS |
   5148	     AMD_CG_SUPPORT_GFX_CGCG |
   5149	     AMD_CG_SUPPORT_GFX_3D_CGCG |
   5150	     AMD_CG_SUPPORT_GFX_3D_CGLS))
   5151	        gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
   5152
   5153	amdgpu_gfx_rlc_exit_safe_mode(adev);
   5154
   5155	return 0;
   5156}
   5157
   5158static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
   5159{
   5160	u32 reg, data;
   5161
   5162	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
   5163	if (amdgpu_sriov_is_pp_one_vf(adev))
   5164		data = RREG32_NO_KIQ(reg);
   5165	else
   5166		data = RREG32(reg);
   5167
   5168	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
   5169	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
   5170
   5171	if (amdgpu_sriov_is_pp_one_vf(adev))
   5172		WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
   5173	else
   5174		WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
   5175}
   5176
   5177static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
   5178	.is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
   5179	.set_safe_mode = gfx_v11_0_set_safe_mode,
   5180	.unset_safe_mode = gfx_v11_0_unset_safe_mode,
   5181	.init = gfx_v11_0_rlc_init,
   5182	.get_csb_size = gfx_v11_0_get_csb_size,
   5183	.get_csb_buffer = gfx_v11_0_get_csb_buffer,
   5184	.resume = gfx_v11_0_rlc_resume,
   5185	.stop = gfx_v11_0_rlc_stop,
   5186	.reset = gfx_v11_0_rlc_reset,
   5187	.start = gfx_v11_0_rlc_start,
   5188	.update_spm_vmid = gfx_v11_0_update_spm_vmid,
   5189};
   5190
   5191static int gfx_v11_0_set_powergating_state(void *handle,
   5192					   enum amd_powergating_state state)
   5193{
   5194	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5195	bool enable = (state == AMD_PG_STATE_GATE);
   5196
   5197	if (amdgpu_sriov_vf(adev))
   5198		return 0;
   5199
   5200	switch (adev->ip_versions[GC_HWIP][0]) {
   5201	case IP_VERSION(11, 0, 0):
   5202	case IP_VERSION(11, 0, 2):
   5203		amdgpu_gfx_off_ctrl(adev, enable);
   5204		break;
   5205	default:
   5206		break;
   5207	}
   5208
   5209	return 0;
   5210}
   5211
   5212static int gfx_v11_0_set_clockgating_state(void *handle,
   5213					  enum amd_clockgating_state state)
   5214{
   5215	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5216
   5217	if (amdgpu_sriov_vf(adev))
   5218	        return 0;
   5219
   5220	switch (adev->ip_versions[GC_HWIP][0]) {
   5221	case IP_VERSION(11, 0, 0):
   5222	case IP_VERSION(11, 0, 2):
   5223	        gfx_v11_0_update_gfx_clock_gating(adev,
   5224	                        state ==  AMD_CG_STATE_GATE);
   5225	        break;
   5226	default:
   5227	        break;
   5228	}
   5229
   5230	return 0;
   5231}
   5232
   5233static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
   5234{
   5235	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5236	int data;
   5237
   5238	/* AMD_CG_SUPPORT_GFX_MGCG */
   5239	data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
   5240	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
   5241		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
   5242
   5243	/* AMD_CG_SUPPORT_REPEATER_FGCG */
   5244	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
   5245		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
   5246
   5247	/* AMD_CG_SUPPORT_GFX_FGCG */
   5248	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
   5249		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
   5250
   5251	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
   5252	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
   5253		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
   5254
   5255	/* AMD_CG_SUPPORT_GFX_CGCG */
   5256	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
   5257	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
   5258		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
   5259
   5260	/* AMD_CG_SUPPORT_GFX_CGLS */
   5261	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
   5262		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
   5263
   5264	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
   5265	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
   5266	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
   5267		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
   5268
   5269	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
   5270	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
   5271		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
   5272}
   5273
   5274static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
   5275{
   5276	/* gfx11 is 32bit rptr*/
   5277	return *(uint32_t *)ring->rptr_cpu_addr;
   5278}
   5279
   5280static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
   5281{
   5282	struct amdgpu_device *adev = ring->adev;
   5283	u64 wptr;
   5284
   5285	/* XXX check if swapping is necessary on BE */
   5286	if (ring->use_doorbell) {
   5287		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
   5288	} else {
   5289		wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
   5290		wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
   5291	}
   5292
   5293	return wptr;
   5294}
   5295
   5296static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
   5297{
   5298	struct amdgpu_device *adev = ring->adev;
   5299
   5300	if (ring->use_doorbell) {
   5301		/* XXX check if swapping is necessary on BE */
   5302		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
   5303		WDOORBELL64(ring->doorbell_index, ring->wptr);
   5304	} else {
   5305		WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
   5306		WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
   5307	}
   5308}
   5309
   5310static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
   5311{
   5312	/* gfx11 hardware is 32bit rptr */
   5313	return *(uint32_t *)ring->rptr_cpu_addr;
   5314}
   5315
   5316static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
   5317{
   5318	u64 wptr;
   5319
   5320	/* XXX check if swapping is necessary on BE */
   5321	if (ring->use_doorbell)
   5322		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
   5323	else
   5324		BUG();
   5325	return wptr;
   5326}
   5327
   5328static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
   5329{
   5330	struct amdgpu_device *adev = ring->adev;
   5331
   5332	/* XXX check if swapping is necessary on BE */
   5333	if (ring->use_doorbell) {
   5334		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
   5335		WDOORBELL64(ring->doorbell_index, ring->wptr);
   5336	} else {
   5337		BUG(); /* only DOORBELL method supported on gfx11 now */
   5338	}
   5339}
   5340
   5341static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
   5342{
   5343	struct amdgpu_device *adev = ring->adev;
   5344	u32 ref_and_mask, reg_mem_engine;
   5345	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
   5346
   5347	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
   5348		switch (ring->me) {
   5349		case 1:
   5350			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
   5351			break;
   5352		case 2:
   5353			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
   5354			break;
   5355		default:
   5356			return;
   5357		}
   5358		reg_mem_engine = 0;
   5359	} else {
   5360		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
   5361		reg_mem_engine = 1; /* pfp */
   5362	}
   5363
   5364	gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
   5365			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
   5366			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
   5367			       ref_and_mask, ref_and_mask, 0x20);
   5368}
   5369
   5370static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
   5371				       struct amdgpu_job *job,
   5372				       struct amdgpu_ib *ib,
   5373				       uint32_t flags)
   5374{
   5375	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
   5376	u32 header, control = 0;
   5377
   5378	BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
   5379
   5380	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
   5381
   5382	control |= ib->length_dw | (vmid << 24);
   5383
   5384	if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
   5385		control |= INDIRECT_BUFFER_PRE_ENB(1);
   5386
   5387		if (flags & AMDGPU_IB_PREEMPTED)
   5388			control |= INDIRECT_BUFFER_PRE_RESUME(1);
   5389
   5390		if (vmid)
   5391			gfx_v11_0_ring_emit_de_meta(ring,
   5392				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
   5393	}
   5394
   5395	if (ring->is_mes_queue)
   5396		/* inherit vmid from mqd */
   5397		control |= 0x400000;
   5398
   5399	amdgpu_ring_write(ring, header);
   5400	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
   5401	amdgpu_ring_write(ring,
   5402#ifdef __BIG_ENDIAN
   5403		(2 << 0) |
   5404#endif
   5405		lower_32_bits(ib->gpu_addr));
   5406	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
   5407	amdgpu_ring_write(ring, control);
   5408}
   5409
   5410static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
   5411					   struct amdgpu_job *job,
   5412					   struct amdgpu_ib *ib,
   5413					   uint32_t flags)
   5414{
   5415	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
   5416	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
   5417
   5418	if (ring->is_mes_queue)
   5419		/* inherit vmid from mqd */
   5420		control |= 0x40000000;
   5421
   5422	/* Currently, there is a high possibility to get wave ID mismatch
   5423	 * between ME and GDS, leading to a hw deadlock, because ME generates
   5424	 * different wave IDs than the GDS expects. This situation happens
   5425	 * randomly when at least 5 compute pipes use GDS ordered append.
   5426	 * The wave IDs generated by ME are also wrong after suspend/resume.
   5427	 * Those are probably bugs somewhere else in the kernel driver.
   5428	 *
   5429	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
   5430	 * GDS to 0 for this ring (me/pipe).
   5431	 */
   5432	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
   5433		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
   5434		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
   5435		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
   5436	}
   5437
   5438	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
   5439	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
   5440	amdgpu_ring_write(ring,
   5441#ifdef __BIG_ENDIAN
   5442				(2 << 0) |
   5443#endif
   5444				lower_32_bits(ib->gpu_addr));
   5445	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
   5446	amdgpu_ring_write(ring, control);
   5447}
   5448
   5449static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
   5450				     u64 seq, unsigned flags)
   5451{
   5452	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
   5453	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
   5454
   5455	/* RELEASE_MEM - flush caches, send int */
   5456	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
   5457	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
   5458				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
   5459				 PACKET3_RELEASE_MEM_GCR_GL2_INV |
   5460				 PACKET3_RELEASE_MEM_GCR_GL2_US |
   5461				 PACKET3_RELEASE_MEM_GCR_GL1_INV |
   5462				 PACKET3_RELEASE_MEM_GCR_GLV_INV |
   5463				 PACKET3_RELEASE_MEM_GCR_GLM_INV |
   5464				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
   5465				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
   5466				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   5467				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
   5468	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
   5469				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
   5470
   5471	/*
   5472	 * the address should be Qword aligned if 64bit write, Dword
   5473	 * aligned if only send 32bit data low (discard data high)
   5474	 */
   5475	if (write64bit)
   5476		BUG_ON(addr & 0x7);
   5477	else
   5478		BUG_ON(addr & 0x3);
   5479	amdgpu_ring_write(ring, lower_32_bits(addr));
   5480	amdgpu_ring_write(ring, upper_32_bits(addr));
   5481	amdgpu_ring_write(ring, lower_32_bits(seq));
   5482	amdgpu_ring_write(ring, upper_32_bits(seq));
   5483	amdgpu_ring_write(ring, ring->is_mes_queue ?
   5484			 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
   5485}
   5486
   5487static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
   5488{
   5489	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
   5490	uint32_t seq = ring->fence_drv.sync_seq;
   5491	uint64_t addr = ring->fence_drv.gpu_addr;
   5492
   5493	gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
   5494			       upper_32_bits(addr), seq, 0xffffffff, 4);
   5495}
   5496
   5497static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
   5498				   uint16_t pasid, uint32_t flush_type,
   5499				   bool all_hub, uint8_t dst_sel)
   5500{
   5501	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
   5502	amdgpu_ring_write(ring,
   5503			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
   5504			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
   5505			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
   5506			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
   5507}
   5508
   5509static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
   5510					 unsigned vmid, uint64_t pd_addr)
   5511{
   5512	if (ring->is_mes_queue)
   5513		gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
   5514	else
   5515		amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
   5516
   5517	/* compute doesn't have PFP */
   5518	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
   5519		/* sync PFP to ME, otherwise we might get invalid PFP reads */
   5520		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
   5521		amdgpu_ring_write(ring, 0x0);
   5522	}
   5523}
   5524
   5525static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
   5526					  u64 seq, unsigned int flags)
   5527{
   5528	struct amdgpu_device *adev = ring->adev;
   5529
   5530	/* we only allocate 32bit for each seq wb address */
   5531	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
   5532
   5533	/* write fence seq to the "addr" */
   5534	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5535	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   5536				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
   5537	amdgpu_ring_write(ring, lower_32_bits(addr));
   5538	amdgpu_ring_write(ring, upper_32_bits(addr));
   5539	amdgpu_ring_write(ring, lower_32_bits(seq));
   5540
   5541	if (flags & AMDGPU_FENCE_FLAG_INT) {
   5542		/* set register to trigger INT */
   5543		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5544		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   5545					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
   5546		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
   5547		amdgpu_ring_write(ring, 0);
   5548		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
   5549	}
   5550}
   5551
   5552static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
   5553					 uint32_t flags)
   5554{
   5555	uint32_t dw2 = 0;
   5556
   5557	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
   5558	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
   5559		/* set load_global_config & load_global_uconfig */
   5560		dw2 |= 0x8001;
   5561		/* set load_cs_sh_regs */
   5562		dw2 |= 0x01000000;
   5563		/* set load_per_context_state & load_gfx_sh_regs for GFX */
   5564		dw2 |= 0x10002;
   5565	}
   5566
   5567	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   5568	amdgpu_ring_write(ring, dw2);
   5569	amdgpu_ring_write(ring, 0);
   5570}
   5571
   5572static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
   5573{
   5574	unsigned ret;
   5575
   5576	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
   5577	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
   5578	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
   5579	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
   5580	ret = ring->wptr & ring->buf_mask;
   5581	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
   5582
   5583	return ret;
   5584}
   5585
   5586static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
   5587{
   5588	unsigned cur;
   5589	BUG_ON(offset > ring->buf_mask);
   5590	BUG_ON(ring->ring[offset] != 0x55aa55aa);
   5591
   5592	cur = (ring->wptr - 1) & ring->buf_mask;
   5593	if (likely(cur > offset))
   5594		ring->ring[offset] = cur - offset;
   5595	else
   5596		ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
   5597}
   5598
   5599static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
   5600{
   5601	int i, r = 0;
   5602	struct amdgpu_device *adev = ring->adev;
   5603	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
   5604	struct amdgpu_ring *kiq_ring = &kiq->ring;
   5605	unsigned long flags;
   5606
   5607	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
   5608		return -EINVAL;
   5609
   5610	spin_lock_irqsave(&kiq->ring_lock, flags);
   5611
   5612	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
   5613		spin_unlock_irqrestore(&kiq->ring_lock, flags);
   5614		return -ENOMEM;
   5615	}
   5616
   5617	/* assert preemption condition */
   5618	amdgpu_ring_set_preempt_cond_exec(ring, false);
   5619
   5620	/* assert IB preemption, emit the trailing fence */
   5621	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
   5622				   ring->trail_fence_gpu_addr,
   5623				   ++ring->trail_seq);
   5624	amdgpu_ring_commit(kiq_ring);
   5625
   5626	spin_unlock_irqrestore(&kiq->ring_lock, flags);
   5627
   5628	/* poll the trailing fence */
   5629	for (i = 0; i < adev->usec_timeout; i++) {
   5630		if (ring->trail_seq ==
   5631		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
   5632			break;
   5633		udelay(1);
   5634	}
   5635
   5636	if (i >= adev->usec_timeout) {
   5637		r = -EINVAL;
   5638		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
   5639	}
   5640
   5641	/* deassert preemption condition */
   5642	amdgpu_ring_set_preempt_cond_exec(ring, true);
   5643	return r;
   5644}
   5645
   5646static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
   5647{
   5648	struct amdgpu_device *adev = ring->adev;
   5649	struct v10_de_ib_state de_payload = {0};
   5650	uint64_t offset, gds_addr, de_payload_gpu_addr;
   5651	void *de_payload_cpu_addr;
   5652	int cnt;
   5653
   5654	if (ring->is_mes_queue) {
   5655		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
   5656				  gfx[0].gfx_meta_data) +
   5657			offsetof(struct v10_gfx_meta_data, de_payload);
   5658		de_payload_gpu_addr =
   5659			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
   5660		de_payload_cpu_addr =
   5661			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
   5662
   5663		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
   5664				  gfx[0].gds_backup) +
   5665			offsetof(struct v10_gfx_meta_data, de_payload);
   5666		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
   5667	} else {
   5668		offset = offsetof(struct v10_gfx_meta_data, de_payload);
   5669		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
   5670		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
   5671
   5672		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
   5673				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
   5674				 PAGE_SIZE);
   5675	}
   5676
   5677	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
   5678	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
   5679
   5680	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
   5681	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
   5682	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
   5683				 WRITE_DATA_DST_SEL(8) |
   5684				 WR_CONFIRM) |
   5685				 WRITE_DATA_CACHE_POLICY(0));
   5686	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
   5687	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
   5688
   5689	if (resume)
   5690		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
   5691					   sizeof(de_payload) >> 2);
   5692	else
   5693		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
   5694					   sizeof(de_payload) >> 2);
   5695}
   5696
   5697static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
   5698				    bool secure)
   5699{
   5700	uint32_t v = secure ? FRAME_TMZ : 0;
   5701
   5702	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
   5703	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
   5704}
   5705
   5706static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
   5707				     uint32_t reg_val_offs)
   5708{
   5709	struct amdgpu_device *adev = ring->adev;
   5710
   5711	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
   5712	amdgpu_ring_write(ring, 0 |	/* src: register*/
   5713				(5 << 8) |	/* dst: memory */
   5714				(1 << 20));	/* write confirm */
   5715	amdgpu_ring_write(ring, reg);
   5716	amdgpu_ring_write(ring, 0);
   5717	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
   5718				reg_val_offs * 4));
   5719	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
   5720				reg_val_offs * 4));
   5721}
   5722
   5723static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
   5724				   uint32_t val)
   5725{
   5726	uint32_t cmd = 0;
   5727
   5728	switch (ring->funcs->type) {
   5729	case AMDGPU_RING_TYPE_GFX:
   5730		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
   5731		break;
   5732	case AMDGPU_RING_TYPE_KIQ:
   5733		cmd = (1 << 16); /* no inc addr */
   5734		break;
   5735	default:
   5736		cmd = WR_CONFIRM;
   5737		break;
   5738	}
   5739	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5740	amdgpu_ring_write(ring, cmd);
   5741	amdgpu_ring_write(ring, reg);
   5742	amdgpu_ring_write(ring, 0);
   5743	amdgpu_ring_write(ring, val);
   5744}
   5745
   5746static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
   5747					uint32_t val, uint32_t mask)
   5748{
   5749	gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
   5750}
   5751
   5752static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
   5753						   uint32_t reg0, uint32_t reg1,
   5754						   uint32_t ref, uint32_t mask)
   5755{
   5756	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
   5757
   5758	gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
   5759			       ref, mask, 0x20);
   5760}
   5761
   5762static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
   5763					 unsigned vmid)
   5764{
   5765	struct amdgpu_device *adev = ring->adev;
   5766	uint32_t value = 0;
   5767
   5768	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
   5769	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
   5770	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
   5771	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
   5772	WREG32_SOC15(GC, 0, regSQ_CMD, value);
   5773}
   5774
   5775static void
   5776gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
   5777				      uint32_t me, uint32_t pipe,
   5778				      enum amdgpu_interrupt_state state)
   5779{
   5780	uint32_t cp_int_cntl, cp_int_cntl_reg;
   5781
   5782	if (!me) {
   5783		switch (pipe) {
   5784		case 0:
   5785			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
   5786			break;
   5787		case 1:
   5788			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
   5789			break;
   5790		default:
   5791			DRM_DEBUG("invalid pipe %d\n", pipe);
   5792			return;
   5793		}
   5794	} else {
   5795		DRM_DEBUG("invalid me %d\n", me);
   5796		return;
   5797	}
   5798
   5799	switch (state) {
   5800	case AMDGPU_IRQ_STATE_DISABLE:
   5801		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
   5802		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
   5803					    TIME_STAMP_INT_ENABLE, 0);
   5804		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
   5805					    GENERIC0_INT_ENABLE, 0);
   5806		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
   5807		break;
   5808	case AMDGPU_IRQ_STATE_ENABLE:
   5809		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
   5810		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
   5811					    TIME_STAMP_INT_ENABLE, 1);
   5812		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
   5813					    GENERIC0_INT_ENABLE, 1);
   5814		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
   5815		break;
   5816	default:
   5817		break;
   5818	}
   5819}
   5820
   5821static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
   5822						     int me, int pipe,
   5823						     enum amdgpu_interrupt_state state)
   5824{
   5825	u32 mec_int_cntl, mec_int_cntl_reg;
   5826
   5827	/*
   5828	 * amdgpu controls only the first MEC. That's why this function only
   5829	 * handles the setting of interrupts for this specific MEC. All other
   5830	 * pipes' interrupts are set by amdkfd.
   5831	 */
   5832
   5833	if (me == 1) {
   5834		switch (pipe) {
   5835		case 0:
   5836			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
   5837			break;
   5838		case 1:
   5839			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
   5840			break;
   5841		case 2:
   5842			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
   5843			break;
   5844		case 3:
   5845			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
   5846			break;
   5847		default:
   5848			DRM_DEBUG("invalid pipe %d\n", pipe);
   5849			return;
   5850		}
   5851	} else {
   5852		DRM_DEBUG("invalid me %d\n", me);
   5853		return;
   5854	}
   5855
   5856	switch (state) {
   5857	case AMDGPU_IRQ_STATE_DISABLE:
   5858		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
   5859		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
   5860					     TIME_STAMP_INT_ENABLE, 0);
   5861		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
   5862					     GENERIC0_INT_ENABLE, 0);
   5863		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
   5864		break;
   5865	case AMDGPU_IRQ_STATE_ENABLE:
   5866		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
   5867		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
   5868					     TIME_STAMP_INT_ENABLE, 1);
   5869		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
   5870					     GENERIC0_INT_ENABLE, 1);
   5871		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
   5872		break;
   5873	default:
   5874		break;
   5875	}
   5876}
   5877
   5878static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
   5879					    struct amdgpu_irq_src *src,
   5880					    unsigned type,
   5881					    enum amdgpu_interrupt_state state)
   5882{
   5883	switch (type) {
   5884	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
   5885		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
   5886		break;
   5887	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
   5888		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
   5889		break;
   5890	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
   5891		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
   5892		break;
   5893	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
   5894		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
   5895		break;
   5896	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
   5897		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
   5898		break;
   5899	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
   5900		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
   5901		break;
   5902	default:
   5903		break;
   5904	}
   5905	return 0;
   5906}
   5907
   5908static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
   5909			     struct amdgpu_irq_src *source,
   5910			     struct amdgpu_iv_entry *entry)
   5911{
   5912	int i;
   5913	u8 me_id, pipe_id, queue_id;
   5914	struct amdgpu_ring *ring;
   5915	uint32_t mes_queue_id = entry->src_data[0];
   5916
   5917	DRM_DEBUG("IH: CP EOP\n");
   5918
   5919	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
   5920		struct amdgpu_mes_queue *queue;
   5921
   5922		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
   5923
   5924		spin_lock(&adev->mes.queue_id_lock);
   5925		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
   5926		if (queue) {
   5927			DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
   5928			amdgpu_fence_process(queue->ring);
   5929		}
   5930		spin_unlock(&adev->mes.queue_id_lock);
   5931	} else {
   5932		me_id = (entry->ring_id & 0x0c) >> 2;
   5933		pipe_id = (entry->ring_id & 0x03) >> 0;
   5934		queue_id = (entry->ring_id & 0x70) >> 4;
   5935
   5936		switch (me_id) {
   5937		case 0:
   5938			if (pipe_id == 0)
   5939				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
   5940			else
   5941				amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
   5942			break;
   5943		case 1:
   5944		case 2:
   5945			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   5946				ring = &adev->gfx.compute_ring[i];
   5947				/* Per-queue interrupt is supported for MEC starting from VI.
   5948				 * The interrupt can only be enabled/disabled per pipe instead
   5949				 * of per queue.
   5950				 */
   5951				if ((ring->me == me_id) &&
   5952				    (ring->pipe == pipe_id) &&
   5953				    (ring->queue == queue_id))
   5954					amdgpu_fence_process(ring);
   5955			}
   5956			break;
   5957		}
   5958	}
   5959
   5960	return 0;
   5961}
   5962
   5963static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
   5964					      struct amdgpu_irq_src *source,
   5965					      unsigned type,
   5966					      enum amdgpu_interrupt_state state)
   5967{
   5968	switch (state) {
   5969	case AMDGPU_IRQ_STATE_DISABLE:
   5970	case AMDGPU_IRQ_STATE_ENABLE:
   5971		WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
   5972			       PRIV_REG_INT_ENABLE,
   5973			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
   5974		break;
   5975	default:
   5976		break;
   5977	}
   5978
   5979	return 0;
   5980}
   5981
   5982static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
   5983					       struct amdgpu_irq_src *source,
   5984					       unsigned type,
   5985					       enum amdgpu_interrupt_state state)
   5986{
   5987	switch (state) {
   5988	case AMDGPU_IRQ_STATE_DISABLE:
   5989	case AMDGPU_IRQ_STATE_ENABLE:
   5990		WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
   5991			       PRIV_INSTR_INT_ENABLE,
   5992			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
   5993		break;
   5994	default:
   5995		break;
   5996	}
   5997
   5998	return 0;
   5999}
   6000
   6001static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
   6002					struct amdgpu_iv_entry *entry)
   6003{
   6004	u8 me_id, pipe_id, queue_id;
   6005	struct amdgpu_ring *ring;
   6006	int i;
   6007
   6008	me_id = (entry->ring_id & 0x0c) >> 2;
   6009	pipe_id = (entry->ring_id & 0x03) >> 0;
   6010	queue_id = (entry->ring_id & 0x70) >> 4;
   6011
   6012	switch (me_id) {
   6013	case 0:
   6014		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
   6015			ring = &adev->gfx.gfx_ring[i];
   6016			/* we only enabled 1 gfx queue per pipe for now */
   6017			if (ring->me == me_id && ring->pipe == pipe_id)
   6018				drm_sched_fault(&ring->sched);
   6019		}
   6020		break;
   6021	case 1:
   6022	case 2:
   6023		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   6024			ring = &adev->gfx.compute_ring[i];
   6025			if (ring->me == me_id && ring->pipe == pipe_id &&
   6026			    ring->queue == queue_id)
   6027				drm_sched_fault(&ring->sched);
   6028		}
   6029		break;
   6030	default:
   6031		BUG();
   6032		break;
   6033	}
   6034}
   6035
   6036static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
   6037				  struct amdgpu_irq_src *source,
   6038				  struct amdgpu_iv_entry *entry)
   6039{
   6040	DRM_ERROR("Illegal register access in command stream\n");
   6041	gfx_v11_0_handle_priv_fault(adev, entry);
   6042	return 0;
   6043}
   6044
   6045static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
   6046				   struct amdgpu_irq_src *source,
   6047				   struct amdgpu_iv_entry *entry)
   6048{
   6049	DRM_ERROR("Illegal instruction in command stream\n");
   6050	gfx_v11_0_handle_priv_fault(adev, entry);
   6051	return 0;
   6052}
   6053
   6054#if 0
   6055static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
   6056					     struct amdgpu_irq_src *src,
   6057					     unsigned int type,
   6058					     enum amdgpu_interrupt_state state)
   6059{
   6060	uint32_t tmp, target;
   6061	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
   6062
   6063	target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
   6064	target += ring->pipe;
   6065
   6066	switch (type) {
   6067	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
   6068		if (state == AMDGPU_IRQ_STATE_DISABLE) {
   6069			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
   6070			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
   6071					    GENERIC2_INT_ENABLE, 0);
   6072			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
   6073
   6074			tmp = RREG32_SOC15_IP(GC, target);
   6075			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
   6076					    GENERIC2_INT_ENABLE, 0);
   6077			WREG32_SOC15_IP(GC, target, tmp);
   6078		} else {
   6079			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
   6080			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
   6081					    GENERIC2_INT_ENABLE, 1);
   6082			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
   6083
   6084			tmp = RREG32_SOC15_IP(GC, target);
   6085			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
   6086					    GENERIC2_INT_ENABLE, 1);
   6087			WREG32_SOC15_IP(GC, target, tmp);
   6088		}
   6089		break;
   6090	default:
   6091		BUG(); /* kiq only support GENERIC2_INT now */
   6092		break;
   6093	}
   6094	return 0;
   6095}
   6096#endif
   6097
   6098static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
   6099{
   6100	const unsigned int gcr_cntl =
   6101			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
   6102			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
   6103			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
   6104			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
   6105			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
   6106			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
   6107			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
   6108			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
   6109
   6110	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
   6111	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
   6112	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
   6113	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
   6114	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
   6115	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
   6116	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
   6117	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
   6118	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
   6119}
   6120
   6121static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
   6122	.name = "gfx_v11_0",
   6123	.early_init = gfx_v11_0_early_init,
   6124	.late_init = gfx_v11_0_late_init,
   6125	.sw_init = gfx_v11_0_sw_init,
   6126	.sw_fini = gfx_v11_0_sw_fini,
   6127	.hw_init = gfx_v11_0_hw_init,
   6128	.hw_fini = gfx_v11_0_hw_fini,
   6129	.suspend = gfx_v11_0_suspend,
   6130	.resume = gfx_v11_0_resume,
   6131	.is_idle = gfx_v11_0_is_idle,
   6132	.wait_for_idle = gfx_v11_0_wait_for_idle,
   6133	.soft_reset = gfx_v11_0_soft_reset,
   6134	.set_clockgating_state = gfx_v11_0_set_clockgating_state,
   6135	.set_powergating_state = gfx_v11_0_set_powergating_state,
   6136	.get_clockgating_state = gfx_v11_0_get_clockgating_state,
   6137};
   6138
   6139static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
   6140	.type = AMDGPU_RING_TYPE_GFX,
   6141	.align_mask = 0xff,
   6142	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
   6143	.support_64bit_ptrs = true,
   6144	.vmhub = AMDGPU_GFXHUB_0,
   6145	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
   6146	.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
   6147	.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
   6148	.emit_frame_size = /* totally 242 maximum if 16 IBs */
   6149		5 + /* COND_EXEC */
   6150		7 + /* PIPELINE_SYNC */
   6151		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
   6152		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
   6153		2 + /* VM_FLUSH */
   6154		8 + /* FENCE for VM_FLUSH */
   6155		20 + /* GDS switch */
   6156		5 + /* COND_EXEC */
   6157		7 + /* HDP_flush */
   6158		4 + /* VGT_flush */
   6159		31 + /*	DE_META */
   6160		3 + /* CNTX_CTRL */
   6161		5 + /* HDP_INVL */
   6162		8 + 8 + /* FENCE x2 */
   6163		8, /* gfx_v11_0_emit_mem_sync */
   6164	.emit_ib_size =	4, /* gfx_v11_0_ring_emit_ib_gfx */
   6165	.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
   6166	.emit_fence = gfx_v11_0_ring_emit_fence,
   6167	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
   6168	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
   6169	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
   6170	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
   6171	.test_ring = gfx_v11_0_ring_test_ring,
   6172	.test_ib = gfx_v11_0_ring_test_ib,
   6173	.insert_nop = amdgpu_ring_insert_nop,
   6174	.pad_ib = amdgpu_ring_generic_pad_ib,
   6175	.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
   6176	.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
   6177	.patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
   6178	.preempt_ib = gfx_v11_0_ring_preempt_ib,
   6179	.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
   6180	.emit_wreg = gfx_v11_0_ring_emit_wreg,
   6181	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
   6182	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
   6183	.soft_recovery = gfx_v11_0_ring_soft_recovery,
   6184	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
   6185};
   6186
   6187static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
   6188	.type = AMDGPU_RING_TYPE_COMPUTE,
   6189	.align_mask = 0xff,
   6190	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
   6191	.support_64bit_ptrs = true,
   6192	.vmhub = AMDGPU_GFXHUB_0,
   6193	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
   6194	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
   6195	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
   6196	.emit_frame_size =
   6197		20 + /* gfx_v11_0_ring_emit_gds_switch */
   6198		7 + /* gfx_v11_0_ring_emit_hdp_flush */
   6199		5 + /* hdp invalidate */
   6200		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
   6201		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
   6202		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
   6203		2 + /* gfx_v11_0_ring_emit_vm_flush */
   6204		8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
   6205		8, /* gfx_v11_0_emit_mem_sync */
   6206	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
   6207	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
   6208	.emit_fence = gfx_v11_0_ring_emit_fence,
   6209	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
   6210	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
   6211	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
   6212	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
   6213	.test_ring = gfx_v11_0_ring_test_ring,
   6214	.test_ib = gfx_v11_0_ring_test_ib,
   6215	.insert_nop = amdgpu_ring_insert_nop,
   6216	.pad_ib = amdgpu_ring_generic_pad_ib,
   6217	.emit_wreg = gfx_v11_0_ring_emit_wreg,
   6218	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
   6219	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
   6220	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
   6221};
   6222
   6223static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
   6224	.type = AMDGPU_RING_TYPE_KIQ,
   6225	.align_mask = 0xff,
   6226	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
   6227	.support_64bit_ptrs = true,
   6228	.vmhub = AMDGPU_GFXHUB_0,
   6229	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
   6230	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
   6231	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
   6232	.emit_frame_size =
   6233		20 + /* gfx_v11_0_ring_emit_gds_switch */
   6234		7 + /* gfx_v11_0_ring_emit_hdp_flush */
   6235		5 + /*hdp invalidate */
   6236		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
   6237		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
   6238		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
   6239		2 + /* gfx_v11_0_ring_emit_vm_flush */
   6240		8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
   6241	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
   6242	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
   6243	.emit_fence = gfx_v11_0_ring_emit_fence_kiq,
   6244	.test_ring = gfx_v11_0_ring_test_ring,
   6245	.test_ib = gfx_v11_0_ring_test_ib,
   6246	.insert_nop = amdgpu_ring_insert_nop,
   6247	.pad_ib = amdgpu_ring_generic_pad_ib,
   6248	.emit_rreg = gfx_v11_0_ring_emit_rreg,
   6249	.emit_wreg = gfx_v11_0_ring_emit_wreg,
   6250	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
   6251	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
   6252};
   6253
   6254static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
   6255{
   6256	int i;
   6257
   6258	adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq;
   6259
   6260	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
   6261		adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
   6262
   6263	for (i = 0; i < adev->gfx.num_compute_rings; i++)
   6264		adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
   6265}
   6266
   6267static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
   6268	.set = gfx_v11_0_set_eop_interrupt_state,
   6269	.process = gfx_v11_0_eop_irq,
   6270};
   6271
   6272static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
   6273	.set = gfx_v11_0_set_priv_reg_fault_state,
   6274	.process = gfx_v11_0_priv_reg_irq,
   6275};
   6276
   6277static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
   6278	.set = gfx_v11_0_set_priv_inst_fault_state,
   6279	.process = gfx_v11_0_priv_inst_irq,
   6280};
   6281
   6282static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
   6283{
   6284	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
   6285	adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
   6286
   6287	adev->gfx.priv_reg_irq.num_types = 1;
   6288	adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
   6289
   6290	adev->gfx.priv_inst_irq.num_types = 1;
   6291	adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
   6292}
   6293
   6294static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
   6295{
   6296	adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
   6297}
   6298
   6299static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
   6300{
   6301	adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
   6302}
   6303
   6304static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
   6305{
   6306	unsigned total_cu = adev->gfx.config.max_cu_per_sh *
   6307			    adev->gfx.config.max_sh_per_se *
   6308			    adev->gfx.config.max_shader_engines;
   6309
   6310	adev->gds.gds_size = 0x1000;
   6311	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
   6312	adev->gds.gws_size = 64;
   6313	adev->gds.oa_size = 16;
   6314}
   6315
   6316static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
   6317{
   6318	/* set gfx eng mqd */
   6319	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
   6320		sizeof(struct v11_gfx_mqd);
   6321	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
   6322		gfx_v11_0_gfx_mqd_init;
   6323	/* set compute eng mqd */
   6324	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
   6325		sizeof(struct v11_compute_mqd);
   6326	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
   6327		gfx_v11_0_compute_mqd_init;
   6328}
   6329
   6330static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
   6331							  u32 bitmap)
   6332{
   6333	u32 data;
   6334
   6335	if (!bitmap)
   6336		return;
   6337
   6338	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
   6339	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
   6340
   6341	WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
   6342}
   6343
   6344static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
   6345{
   6346	u32 data, wgp_bitmask;
   6347	data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
   6348	data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
   6349
   6350	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
   6351	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
   6352
   6353	wgp_bitmask =
   6354		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
   6355
   6356	return (~data) & wgp_bitmask;
   6357}
   6358
   6359static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
   6360{
   6361	u32 wgp_idx, wgp_active_bitmap;
   6362	u32 cu_bitmap_per_wgp, cu_active_bitmap;
   6363
   6364	wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
   6365	cu_active_bitmap = 0;
   6366
   6367	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
   6368		/* if there is one WGP enabled, it means 2 CUs will be enabled */
   6369		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
   6370		if (wgp_active_bitmap & (1 << wgp_idx))
   6371			cu_active_bitmap |= cu_bitmap_per_wgp;
   6372	}
   6373
   6374	return cu_active_bitmap;
   6375}
   6376
   6377static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
   6378				 struct amdgpu_cu_info *cu_info)
   6379{
   6380	int i, j, k, counter, active_cu_number = 0;
   6381	u32 mask, bitmap;
   6382	unsigned disable_masks[8 * 2];
   6383
   6384	if (!adev || !cu_info)
   6385		return -EINVAL;
   6386
   6387	amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
   6388
   6389	mutex_lock(&adev->grbm_idx_mutex);
   6390	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   6391		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   6392			mask = 1;
   6393			counter = 0;
   6394			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
   6395			if (i < 8 && j < 2)
   6396				gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
   6397					adev, disable_masks[i * 2 + j]);
   6398			bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
   6399
   6400			/**
   6401			 * GFX11 could support more than 4 SEs, while the bitmap
   6402			 * in cu_info struct is 4x4 and ioctl interface struct
   6403			 * drm_amdgpu_info_device should keep stable.
   6404			 * So we use last two columns of bitmap to store cu mask for
   6405			 * SEs 4 to 7, the layout of the bitmap is as below:
   6406			 *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
   6407			 *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
   6408			 *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
   6409			 *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
   6410			 *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
   6411			 *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
   6412			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
   6413			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
   6414			 */
   6415			cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
   6416
   6417			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
   6418				if (bitmap & mask)
   6419					counter++;
   6420
   6421				mask <<= 1;
   6422			}
   6423			active_cu_number += counter;
   6424		}
   6425	}
   6426	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   6427	mutex_unlock(&adev->grbm_idx_mutex);
   6428
   6429	cu_info->number = active_cu_number;
   6430	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
   6431
   6432	return 0;
   6433}
   6434
   6435const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
   6436{
   6437	.type = AMD_IP_BLOCK_TYPE_GFX,
   6438	.major = 11,
   6439	.minor = 0,
   6440	.rev = 0,
   6441	.funcs = &gfx_v11_0_ip_funcs,
   6442};