cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

mes_v10_1.c (34920B)


      1/*
      2 * Copyright 2019 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 */
     23
     24#include <linux/firmware.h>
     25#include <linux/module.h>
     26#include "amdgpu.h"
     27#include "soc15_common.h"
     28#include "nv.h"
     29#include "gc/gc_10_1_0_offset.h"
     30#include "gc/gc_10_1_0_sh_mask.h"
     31#include "gc/gc_10_1_0_default.h"
     32#include "v10_structs.h"
     33#include "mes_api_def.h"
     34
     35#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid               0x2820
     36#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX      1
     37#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid		0x4ca1
     38#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX	1
     39
     40MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
     41MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
     42MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes1.bin");
     43
     44static int mes_v10_1_hw_fini(void *handle);
     45static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev);
     46
     47#define MES_EOP_SIZE   2048
     48
     49static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
     50{
     51	struct amdgpu_device *adev = ring->adev;
     52
     53	if (ring->use_doorbell) {
     54		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
     55			     ring->wptr);
     56		WDOORBELL64(ring->doorbell_index, ring->wptr);
     57	} else {
     58		BUG();
     59	}
     60}
     61
     62static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
     63{
     64	return *ring->rptr_cpu_addr;
     65}
     66
     67static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
     68{
     69	u64 wptr;
     70
     71	if (ring->use_doorbell)
     72		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
     73	else
     74		BUG();
     75	return wptr;
     76}
     77
     78static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
     79	.type = AMDGPU_RING_TYPE_MES,
     80	.align_mask = 1,
     81	.nop = 0,
     82	.support_64bit_ptrs = true,
     83	.get_rptr = mes_v10_1_ring_get_rptr,
     84	.get_wptr = mes_v10_1_ring_get_wptr,
     85	.set_wptr = mes_v10_1_ring_set_wptr,
     86	.insert_nop = amdgpu_ring_insert_nop,
     87};
     88
     89static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
     90						    void *pkt, int size)
     91{
     92	int ndw = size / 4;
     93	signed long r;
     94	union MESAPI__ADD_QUEUE *x_pkt = pkt;
     95	struct amdgpu_device *adev = mes->adev;
     96	struct amdgpu_ring *ring = &mes->ring;
     97
     98	BUG_ON(size % 4 != 0);
     99
    100	if (amdgpu_ring_alloc(ring, ndw))
    101		return -ENOMEM;
    102
    103	amdgpu_ring_write_multiple(ring, pkt, ndw);
    104	amdgpu_ring_commit(ring);
    105
    106	DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
    107
    108	r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
    109				      adev->usec_timeout);
    110	if (r < 1) {
    111		DRM_ERROR("MES failed to response msg=%d\n",
    112			  x_pkt->header.opcode);
    113		return -ETIMEDOUT;
    114	}
    115
    116	return 0;
    117}
    118
    119static int convert_to_mes_queue_type(int queue_type)
    120{
    121	if (queue_type == AMDGPU_RING_TYPE_GFX)
    122		return MES_QUEUE_TYPE_GFX;
    123	else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
    124		return MES_QUEUE_TYPE_COMPUTE;
    125	else if (queue_type == AMDGPU_RING_TYPE_SDMA)
    126		return MES_QUEUE_TYPE_SDMA;
    127	else
    128		BUG();
    129	return -1;
    130}
    131
    132static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
    133				  struct mes_add_queue_input *input)
    134{
    135	struct amdgpu_device *adev = mes->adev;
    136	union MESAPI__ADD_QUEUE mes_add_queue_pkt;
    137	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
    138	uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
    139
    140	memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
    141
    142	mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
    143	mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
    144	mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
    145
    146	mes_add_queue_pkt.process_id = input->process_id;
    147	mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
    148	mes_add_queue_pkt.process_va_start = input->process_va_start;
    149	mes_add_queue_pkt.process_va_end = input->process_va_end;
    150	mes_add_queue_pkt.process_quantum = input->process_quantum;
    151	mes_add_queue_pkt.process_context_addr = input->process_context_addr;
    152	mes_add_queue_pkt.gang_quantum = input->gang_quantum;
    153	mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
    154	mes_add_queue_pkt.inprocess_gang_priority =
    155		input->inprocess_gang_priority;
    156	mes_add_queue_pkt.gang_global_priority_level =
    157		input->gang_global_priority_level;
    158	mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
    159	mes_add_queue_pkt.mqd_addr = input->mqd_addr;
    160	mes_add_queue_pkt.wptr_addr = input->wptr_addr;
    161	mes_add_queue_pkt.queue_type =
    162		convert_to_mes_queue_type(input->queue_type);
    163	mes_add_queue_pkt.paging = input->paging;
    164	mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
    165	mes_add_queue_pkt.gws_base = input->gws_base;
    166	mes_add_queue_pkt.gws_size = input->gws_size;
    167	mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
    168
    169	mes_add_queue_pkt.api_status.api_completion_fence_addr =
    170		mes->ring.fence_drv.gpu_addr;
    171	mes_add_queue_pkt.api_status.api_completion_fence_value =
    172		++mes->ring.fence_drv.sync_seq;
    173
    174	return mes_v10_1_submit_pkt_and_poll_completion(mes,
    175			&mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
    176}
    177
    178static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
    179				     struct mes_remove_queue_input *input)
    180{
    181	union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
    182
    183	memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
    184
    185	mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
    186	mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
    187	mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
    188
    189	mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
    190	mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
    191
    192	mes_remove_queue_pkt.api_status.api_completion_fence_addr =
    193		mes->ring.fence_drv.gpu_addr;
    194	mes_remove_queue_pkt.api_status.api_completion_fence_value =
    195		++mes->ring.fence_drv.sync_seq;
    196
    197	return mes_v10_1_submit_pkt_and_poll_completion(mes,
    198			&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
    199}
    200
    201static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
    202				 struct mes_unmap_legacy_queue_input *input)
    203{
    204	union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
    205
    206	memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
    207
    208	mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
    209	mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
    210	mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
    211
    212	mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
    213	mes_remove_queue_pkt.gang_context_addr = 0;
    214
    215	mes_remove_queue_pkt.pipe_id = input->pipe_id;
    216	mes_remove_queue_pkt.queue_id = input->queue_id;
    217
    218	if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
    219		mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
    220		mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
    221		mes_remove_queue_pkt.tf_data =
    222			lower_32_bits(input->trail_fence_data);
    223	} else {
    224		if (input->queue_type == AMDGPU_RING_TYPE_GFX)
    225			mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1;
    226		else
    227			mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
    228	}
    229
    230	mes_remove_queue_pkt.api_status.api_completion_fence_addr =
    231		mes->ring.fence_drv.gpu_addr;
    232	mes_remove_queue_pkt.api_status.api_completion_fence_value =
    233		++mes->ring.fence_drv.sync_seq;
    234
    235	return mes_v10_1_submit_pkt_and_poll_completion(mes,
    236			&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
    237}
    238
    239static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
    240				  struct mes_suspend_gang_input *input)
    241{
    242	return 0;
    243}
    244
    245static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
    246				 struct mes_resume_gang_input *input)
    247{
    248	return 0;
    249}
    250
    251static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
    252{
    253	union MESAPI__QUERY_MES_STATUS mes_status_pkt;
    254
    255	memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
    256
    257	mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
    258	mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
    259	mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
    260
    261	mes_status_pkt.api_status.api_completion_fence_addr =
    262		mes->ring.fence_drv.gpu_addr;
    263	mes_status_pkt.api_status.api_completion_fence_value =
    264		++mes->ring.fence_drv.sync_seq;
    265
    266	return mes_v10_1_submit_pkt_and_poll_completion(mes,
    267			&mes_status_pkt, sizeof(mes_status_pkt));
    268}
    269
    270static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
    271{
    272	int i;
    273	struct amdgpu_device *adev = mes->adev;
    274	union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
    275
    276	memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
    277
    278	mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
    279	mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
    280	mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
    281
    282	mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
    283	mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
    284	mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
    285	mes_set_hw_res_pkt.paging_vmid = 0;
    286	mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
    287	mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
    288		mes->query_status_fence_gpu_addr;
    289
    290	for (i = 0; i < MAX_COMPUTE_PIPES; i++)
    291		mes_set_hw_res_pkt.compute_hqd_mask[i] =
    292			mes->compute_hqd_mask[i];
    293
    294	for (i = 0; i < MAX_GFX_PIPES; i++)
    295		mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
    296
    297	for (i = 0; i < MAX_SDMA_PIPES; i++)
    298		mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
    299
    300	for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
    301		mes_set_hw_res_pkt.aggregated_doorbells[i] =
    302			mes->agreegated_doorbells[i];
    303
    304	for (i = 0; i < 5; i++) {
    305		mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
    306		mes_set_hw_res_pkt.mmhub_base[i] =
    307			adev->reg_offset[MMHUB_HWIP][0][i];
    308		mes_set_hw_res_pkt.osssys_base[i] =
    309			adev->reg_offset[OSSSYS_HWIP][0][i];
    310	}
    311
    312	mes_set_hw_res_pkt.disable_reset = 1;
    313	mes_set_hw_res_pkt.disable_mes_log = 1;
    314	mes_set_hw_res_pkt.use_different_vmid_compute = 1;
    315
    316	mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
    317		mes->ring.fence_drv.gpu_addr;
    318	mes_set_hw_res_pkt.api_status.api_completion_fence_value =
    319		++mes->ring.fence_drv.sync_seq;
    320
    321	return mes_v10_1_submit_pkt_and_poll_completion(mes,
    322			&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
    323}
    324
    325static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
    326	.add_hw_queue = mes_v10_1_add_hw_queue,
    327	.remove_hw_queue = mes_v10_1_remove_hw_queue,
    328	.unmap_legacy_queue = mes_v10_1_unmap_legacy_queue,
    329	.suspend_gang = mes_v10_1_suspend_gang,
    330	.resume_gang = mes_v10_1_resume_gang,
    331};
    332
    333static int mes_v10_1_init_microcode(struct amdgpu_device *adev,
    334				    enum admgpu_mes_pipe pipe)
    335{
    336	const char *chip_name;
    337	char fw_name[30];
    338	int err;
    339	const struct mes_firmware_header_v1_0 *mes_hdr;
    340	struct amdgpu_firmware_info *info;
    341
    342	switch (adev->ip_versions[GC_HWIP][0]) {
    343	case IP_VERSION(10, 1, 10):
    344		chip_name = "navi10";
    345		break;
    346	case IP_VERSION(10, 3, 0):
    347		chip_name = "sienna_cichlid";
    348		break;
    349	default:
    350		BUG();
    351	}
    352
    353	if (pipe == AMDGPU_MES_SCHED_PIPE)
    354		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
    355			 chip_name);
    356	else
    357		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes1.bin",
    358			 chip_name);
    359
    360	err = request_firmware(&adev->mes.fw[pipe], fw_name, adev->dev);
    361	if (err)
    362		return err;
    363
    364	err = amdgpu_ucode_validate(adev->mes.fw[pipe]);
    365	if (err) {
    366		release_firmware(adev->mes.fw[pipe]);
    367		adev->mes.fw[pipe] = NULL;
    368		return err;
    369	}
    370
    371	mes_hdr = (const struct mes_firmware_header_v1_0 *)
    372		adev->mes.fw[pipe]->data;
    373	adev->mes.ucode_fw_version[pipe] =
    374		le32_to_cpu(mes_hdr->mes_ucode_version);
    375	adev->mes.ucode_fw_version[pipe] =
    376		le32_to_cpu(mes_hdr->mes_ucode_data_version);
    377	adev->mes.uc_start_addr[pipe] =
    378		le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
    379		((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
    380	adev->mes.data_start_addr[pipe] =
    381		le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
    382		((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
    383
    384	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
    385		int ucode, ucode_data;
    386
    387		if (pipe == AMDGPU_MES_SCHED_PIPE) {
    388			ucode = AMDGPU_UCODE_ID_CP_MES;
    389			ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
    390		} else {
    391			ucode = AMDGPU_UCODE_ID_CP_MES1;
    392			ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
    393		}
    394
    395		info = &adev->firmware.ucode[ucode];
    396		info->ucode_id = ucode;
    397		info->fw = adev->mes.fw[pipe];
    398		adev->firmware.fw_size +=
    399			ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
    400			      PAGE_SIZE);
    401
    402		info = &adev->firmware.ucode[ucode_data];
    403		info->ucode_id = ucode_data;
    404		info->fw = adev->mes.fw[pipe];
    405		adev->firmware.fw_size +=
    406			ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
    407			      PAGE_SIZE);
    408	}
    409
    410	return 0;
    411}
    412
    413static void mes_v10_1_free_microcode(struct amdgpu_device *adev,
    414				     enum admgpu_mes_pipe pipe)
    415{
    416	release_firmware(adev->mes.fw[pipe]);
    417	adev->mes.fw[pipe] = NULL;
    418}
    419
    420static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev,
    421					   enum admgpu_mes_pipe pipe)
    422{
    423	int r;
    424	const struct mes_firmware_header_v1_0 *mes_hdr;
    425	const __le32 *fw_data;
    426	unsigned fw_size;
    427
    428	mes_hdr = (const struct mes_firmware_header_v1_0 *)
    429		adev->mes.fw[pipe]->data;
    430
    431	fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
    432		   le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
    433	fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
    434
    435	r = amdgpu_bo_create_reserved(adev, fw_size,
    436				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
    437				      &adev->mes.ucode_fw_obj[pipe],
    438				      &adev->mes.ucode_fw_gpu_addr[pipe],
    439				      (void **)&adev->mes.ucode_fw_ptr[pipe]);
    440	if (r) {
    441		dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
    442		return r;
    443	}
    444
    445	memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
    446
    447	amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
    448	amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
    449
    450	return 0;
    451}
    452
    453static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev,
    454						enum admgpu_mes_pipe pipe)
    455{
    456	int r;
    457	const struct mes_firmware_header_v1_0 *mes_hdr;
    458	const __le32 *fw_data;
    459	unsigned fw_size;
    460
    461	mes_hdr = (const struct mes_firmware_header_v1_0 *)
    462		adev->mes.fw[pipe]->data;
    463
    464	fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
    465		   le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
    466	fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
    467
    468	r = amdgpu_bo_create_reserved(adev, fw_size,
    469				      64 * 1024, AMDGPU_GEM_DOMAIN_GTT,
    470				      &adev->mes.data_fw_obj[pipe],
    471				      &adev->mes.data_fw_gpu_addr[pipe],
    472				      (void **)&adev->mes.data_fw_ptr[pipe]);
    473	if (r) {
    474		dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
    475		return r;
    476	}
    477
    478	memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
    479
    480	amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
    481	amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
    482
    483	return 0;
    484}
    485
    486static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev,
    487					 enum admgpu_mes_pipe pipe)
    488{
    489	amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
    490			      &adev->mes.data_fw_gpu_addr[pipe],
    491			      (void **)&adev->mes.data_fw_ptr[pipe]);
    492
    493	amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
    494			      &adev->mes.ucode_fw_gpu_addr[pipe],
    495			      (void **)&adev->mes.ucode_fw_ptr[pipe]);
    496}
    497
    498static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable)
    499{
    500	uint32_t pipe, data = 0;
    501
    502	if (enable) {
    503		data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
    504		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
    505		data = REG_SET_FIELD(data, CP_MES_CNTL,
    506			     MES_PIPE1_RESET, adev->enable_mes_kiq ? 1 : 0);
    507		WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
    508
    509		mutex_lock(&adev->srbm_mutex);
    510		for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
    511			if (!adev->enable_mes_kiq &&
    512			    pipe == AMDGPU_MES_KIQ_PIPE)
    513				continue;
    514
    515			nv_grbm_select(adev, 3, pipe, 0, 0);
    516			WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
    517			     (uint32_t)(adev->mes.uc_start_addr[pipe]) >> 2);
    518		}
    519		nv_grbm_select(adev, 0, 0, 0, 0);
    520		mutex_unlock(&adev->srbm_mutex);
    521
    522		/* clear BYPASS_UNCACHED to avoid hangs after interrupt. */
    523		data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL);
    524		data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL,
    525				     BYPASS_UNCACHED, 0);
    526		WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data);
    527
    528		/* unhalt MES and activate pipe0 */
    529		data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
    530		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE,
    531				     adev->enable_mes_kiq ? 1 : 0);
    532		WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
    533		udelay(100);
    534	} else {
    535		data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
    536		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
    537		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
    538		data = REG_SET_FIELD(data, CP_MES_CNTL,
    539				     MES_INVALIDATE_ICACHE, 1);
    540		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
    541		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET,
    542				     adev->enable_mes_kiq ? 1 : 0);
    543		data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
    544		WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
    545	}
    546}
    547
    548/* This function is for backdoor MES firmware */
    549static int mes_v10_1_load_microcode(struct amdgpu_device *adev,
    550				    enum admgpu_mes_pipe pipe)
    551{
    552	int r;
    553	uint32_t data;
    554
    555	mes_v10_1_enable(adev, false);
    556
    557	if (!adev->mes.fw[pipe])
    558		return -EINVAL;
    559
    560	r = mes_v10_1_allocate_ucode_buffer(adev, pipe);
    561	if (r)
    562		return r;
    563
    564	r = mes_v10_1_allocate_ucode_data_buffer(adev, pipe);
    565	if (r) {
    566		mes_v10_1_free_ucode_buffers(adev, pipe);
    567		return r;
    568	}
    569
    570	WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0);
    571
    572	mutex_lock(&adev->srbm_mutex);
    573	/* me=3, pipe=0, queue=0 */
    574	nv_grbm_select(adev, 3, pipe, 0, 0);
    575
    576	/* set ucode start address */
    577	WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
    578		     (uint32_t)(adev->mes.uc_start_addr[pipe]) >> 2);
    579
    580	/* set ucode fimrware address */
    581	WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO,
    582		     lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
    583	WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI,
    584		     upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
    585
    586	/* set ucode instruction cache boundary to 2M-1 */
    587	WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF);
    588
    589	/* set ucode data firmware address */
    590	WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO,
    591		     lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
    592	WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI,
    593		     upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
    594
    595	/* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
    596	WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
    597
    598	/* invalidate ICACHE */
    599	switch (adev->ip_versions[GC_HWIP][0]) {
    600	case IP_VERSION(10, 3, 0):
    601		data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
    602		break;
    603	default:
    604		data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
    605		break;
    606	}
    607	data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
    608	data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
    609	switch (adev->ip_versions[GC_HWIP][0]) {
    610	case IP_VERSION(10, 3, 0):
    611		WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
    612		break;
    613	default:
    614		WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
    615		break;
    616	}
    617
    618	/* prime the ICACHE. */
    619	switch (adev->ip_versions[GC_HWIP][0]) {
    620	case IP_VERSION(10, 3, 0):
    621		data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
    622		break;
    623	default:
    624		data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
    625		break;
    626	}
    627	data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
    628	switch (adev->ip_versions[GC_HWIP][0]) {
    629	case IP_VERSION(10, 3, 0):
    630		WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
    631		break;
    632	default:
    633		WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
    634		break;
    635	}
    636
    637	nv_grbm_select(adev, 0, 0, 0, 0);
    638	mutex_unlock(&adev->srbm_mutex);
    639
    640	return 0;
    641}
    642
    643static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev,
    644				      enum admgpu_mes_pipe pipe)
    645{
    646	int r;
    647	u32 *eop;
    648
    649	r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
    650			      AMDGPU_GEM_DOMAIN_GTT,
    651			      &adev->mes.eop_gpu_obj[pipe],
    652			      &adev->mes.eop_gpu_addr[pipe],
    653			      (void **)&eop);
    654	if (r) {
    655		dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
    656		return r;
    657	}
    658
    659	memset(eop, 0, adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
    660
    661	amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
    662	amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
    663
    664	return 0;
    665}
    666
    667static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
    668{
    669	struct v10_compute_mqd *mqd = ring->mqd_ptr;
    670	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
    671	uint32_t tmp;
    672
    673	mqd->header = 0xC0310800;
    674	mqd->compute_pipelinestat_enable = 0x00000001;
    675	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
    676	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
    677	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
    678	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
    679	mqd->compute_misc_reserved = 0x00000003;
    680
    681	eop_base_addr = ring->eop_gpu_addr >> 8;
    682
    683	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
    684	tmp = mmCP_HQD_EOP_CONTROL_DEFAULT;
    685	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
    686			(order_base_2(MES_EOP_SIZE / 4) - 1));
    687
    688	mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
    689	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
    690	mqd->cp_hqd_eop_control = tmp;
    691
    692	/* disable the queue if it's active */
    693	ring->wptr = 0;
    694	mqd->cp_hqd_pq_rptr = 0;
    695	mqd->cp_hqd_pq_wptr_lo = 0;
    696	mqd->cp_hqd_pq_wptr_hi = 0;
    697
    698	/* set the pointer to the MQD */
    699	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
    700	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
    701
    702	/* set MQD vmid to 0 */
    703	tmp = mmCP_MQD_CONTROL_DEFAULT;
    704	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
    705	mqd->cp_mqd_control = tmp;
    706
    707	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
    708	hqd_gpu_addr = ring->gpu_addr >> 8;
    709	mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
    710	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
    711
    712	/* set the wb address whether it's enabled or not */
    713	wb_gpu_addr = ring->rptr_gpu_addr;
    714	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
    715	mqd->cp_hqd_pq_rptr_report_addr_hi =
    716		upper_32_bits(wb_gpu_addr) & 0xffff;
    717
    718	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
    719	wb_gpu_addr = ring->wptr_gpu_addr;
    720	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
    721	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
    722
    723	/* set up the HQD, this is similar to CP_RB0_CNTL */
    724	tmp = mmCP_HQD_PQ_CONTROL_DEFAULT;
    725	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
    726			    (order_base_2(ring->ring_size / 4) - 1));
    727	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
    728			    ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
    729#ifdef __BIG_ENDIAN
    730	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
    731#endif
    732	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
    733	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
    734	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
    735	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
    736	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
    737	mqd->cp_hqd_pq_control = tmp;
    738
    739	/* enable doorbell? */
    740	tmp = 0;
    741	if (ring->use_doorbell) {
    742		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    743				    DOORBELL_OFFSET, ring->doorbell_index);
    744		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    745				    DOORBELL_EN, 1);
    746		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    747				    DOORBELL_SOURCE, 0);
    748		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    749				    DOORBELL_HIT, 0);
    750	}
    751	else
    752		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    753				    DOORBELL_EN, 0);
    754	mqd->cp_hqd_pq_doorbell_control = tmp;
    755
    756	mqd->cp_hqd_vmid = 0;
    757	/* activate the queue */
    758	mqd->cp_hqd_active = 1;
    759	mqd->cp_hqd_persistent_state = mmCP_HQD_PERSISTENT_STATE_DEFAULT;
    760	mqd->cp_hqd_ib_control = mmCP_HQD_IB_CONTROL_DEFAULT;
    761	mqd->cp_hqd_iq_timer = mmCP_HQD_IQ_TIMER_DEFAULT;
    762	mqd->cp_hqd_quantum = mmCP_HQD_QUANTUM_DEFAULT;
    763
    764	tmp = mmCP_HQD_GFX_CONTROL_DEFAULT;
    765	tmp = REG_SET_FIELD(tmp, CP_HQD_GFX_CONTROL, DB_UPDATED_MSG_EN, 1);
    766	/* offset: 184 - this is used for CP_HQD_GFX_CONTROL */
    767	mqd->cp_hqd_suspend_cntl_stack_offset = tmp;
    768
    769	return 0;
    770}
    771
    772#if 0
    773static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
    774{
    775	struct v10_compute_mqd *mqd = ring->mqd_ptr;
    776	struct amdgpu_device *adev = ring->adev;
    777	uint32_t data = 0;
    778
    779	mutex_lock(&adev->srbm_mutex);
    780	nv_grbm_select(adev, 3, ring->pipe, 0, 0);
    781
    782	/* set CP_HQD_VMID.VMID = 0. */
    783	data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
    784	data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
    785	WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
    786
    787	/* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
    788	data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
    789	data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
    790			     DOORBELL_EN, 0);
    791	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
    792
    793	/* set CP_MQD_BASE_ADDR/HI with the MQD base address */
    794	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
    795	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
    796
    797	/* set CP_MQD_CONTROL.VMID=0 */
    798	data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
    799	data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
    800	WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
    801
    802	/* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
    803	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
    804	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
    805
    806	/* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
    807	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
    808		     mqd->cp_hqd_pq_rptr_report_addr_lo);
    809	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
    810		     mqd->cp_hqd_pq_rptr_report_addr_hi);
    811
    812	/* set CP_HQD_PQ_CONTROL */
    813	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
    814
    815	/* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
    816	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
    817		     mqd->cp_hqd_pq_wptr_poll_addr_lo);
    818	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
    819		     mqd->cp_hqd_pq_wptr_poll_addr_hi);
    820
    821	/* set CP_HQD_PQ_DOORBELL_CONTROL */
    822	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
    823		     mqd->cp_hqd_pq_doorbell_control);
    824
    825	/* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
    826	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
    827
    828	/* set CP_HQD_ACTIVE.ACTIVE=1 */
    829	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
    830
    831	nv_grbm_select(adev, 0, 0, 0, 0);
    832	mutex_unlock(&adev->srbm_mutex);
    833}
    834#endif
    835
    836static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
    837{
    838	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
    839	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
    840	int r;
    841
    842	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
    843		return -EINVAL;
    844
    845	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
    846	if (r) {
    847		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
    848		return r;
    849	}
    850
    851	kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
    852
    853	r = amdgpu_ring_test_ring(kiq_ring);
    854	if (r) {
    855		DRM_ERROR("kfq enable failed\n");
    856		kiq_ring->sched.ready = false;
    857	}
    858
    859	return r;
    860}
    861
    862static int mes_v10_1_queue_init(struct amdgpu_device *adev)
    863{
    864	int r;
    865
    866	r = mes_v10_1_mqd_init(&adev->mes.ring);
    867	if (r)
    868		return r;
    869
    870	r = mes_v10_1_kiq_enable_queue(adev);
    871	if (r)
    872		return r;
    873
    874	return 0;
    875}
    876
    877static int mes_v10_1_ring_init(struct amdgpu_device *adev)
    878{
    879	struct amdgpu_ring *ring;
    880
    881	ring = &adev->mes.ring;
    882
    883	ring->funcs = &mes_v10_1_ring_funcs;
    884
    885	ring->me = 3;
    886	ring->pipe = 0;
    887	ring->queue = 0;
    888
    889	ring->ring_obj = NULL;
    890	ring->use_doorbell = true;
    891	ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
    892	ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE];
    893	ring->no_scheduler = true;
    894	sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
    895
    896	return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
    897				AMDGPU_RING_PRIO_DEFAULT, NULL);
    898}
    899
    900static int mes_v10_1_kiq_ring_init(struct amdgpu_device *adev)
    901{
    902	struct amdgpu_ring *ring;
    903
    904	spin_lock_init(&adev->gfx.kiq.ring_lock);
    905
    906	ring = &adev->gfx.kiq.ring;
    907
    908	ring->me = 3;
    909	ring->pipe = 1;
    910	ring->queue = 0;
    911
    912	ring->adev = NULL;
    913	ring->ring_obj = NULL;
    914	ring->use_doorbell = true;
    915	ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
    916	ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
    917	ring->no_scheduler = true;
    918	sprintf(ring->name, "mes_kiq_%d.%d.%d",
    919		ring->me, ring->pipe, ring->queue);
    920
    921	return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
    922				AMDGPU_RING_PRIO_DEFAULT, NULL);
    923}
    924
    925static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev,
    926				 enum admgpu_mes_pipe pipe)
    927{
    928	int r, mqd_size = sizeof(struct v10_compute_mqd);
    929	struct amdgpu_ring *ring;
    930
    931	if (pipe == AMDGPU_MES_KIQ_PIPE)
    932		ring = &adev->gfx.kiq.ring;
    933	else if (pipe == AMDGPU_MES_SCHED_PIPE)
    934		ring = &adev->mes.ring;
    935	else
    936		BUG();
    937
    938	if (ring->mqd_obj)
    939		return 0;
    940
    941	r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
    942				    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
    943				    &ring->mqd_gpu_addr, &ring->mqd_ptr);
    944	if (r) {
    945		dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
    946		return r;
    947	}
    948	memset(ring->mqd_ptr, 0, mqd_size);
    949
    950	/* prepare MQD backup */
    951	adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
    952	if (!adev->mes.mqd_backup[pipe])
    953		dev_warn(adev->dev,
    954			 "no memory to create MQD backup for ring %s\n",
    955			 ring->name);
    956
    957	return 0;
    958}
    959
    960static int mes_v10_1_sw_init(void *handle)
    961{
    962	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    963	int pipe, r;
    964
    965	adev->mes.adev = adev;
    966	adev->mes.funcs = &mes_v10_1_funcs;
    967	adev->mes.kiq_hw_init = &mes_v10_1_kiq_hw_init;
    968
    969	r = amdgpu_mes_init(adev);
    970	if (r)
    971		return r;
    972
    973	for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
    974		if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
    975			continue;
    976
    977		r = mes_v10_1_init_microcode(adev, pipe);
    978		if (r)
    979			return r;
    980
    981		r = mes_v10_1_allocate_eop_buf(adev, pipe);
    982		if (r)
    983			return r;
    984
    985		r = mes_v10_1_mqd_sw_init(adev, pipe);
    986		if (r)
    987			return r;
    988	}
    989
    990	if (adev->enable_mes_kiq) {
    991		r = mes_v10_1_kiq_ring_init(adev);
    992		if (r)
    993			return r;
    994	}
    995
    996	r = mes_v10_1_ring_init(adev);
    997	if (r)
    998		return r;
    999
   1000	return 0;
   1001}
   1002
   1003static int mes_v10_1_sw_fini(void *handle)
   1004{
   1005	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1006	int pipe;
   1007
   1008	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
   1009	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
   1010
   1011	for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
   1012		kfree(adev->mes.mqd_backup[pipe]);
   1013
   1014		amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
   1015				      &adev->mes.eop_gpu_addr[pipe],
   1016				      NULL);
   1017
   1018		mes_v10_1_free_microcode(adev, pipe);
   1019	}
   1020
   1021	amdgpu_bo_free_kernel(&adev->gfx.kiq.ring.mqd_obj,
   1022			      &adev->gfx.kiq.ring.mqd_gpu_addr,
   1023			      &adev->gfx.kiq.ring.mqd_ptr);
   1024
   1025	amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
   1026			      &adev->mes.ring.mqd_gpu_addr,
   1027			      &adev->mes.ring.mqd_ptr);
   1028
   1029	amdgpu_ring_fini(&adev->gfx.kiq.ring);
   1030	amdgpu_ring_fini(&adev->mes.ring);
   1031
   1032	amdgpu_mes_fini(adev);
   1033	return 0;
   1034}
   1035
   1036static void mes_v10_1_kiq_setting(struct amdgpu_ring *ring)
   1037{
   1038	uint32_t tmp;
   1039	struct amdgpu_device *adev = ring->adev;
   1040
   1041	/* tell RLC which is KIQ queue */
   1042	switch (adev->ip_versions[GC_HWIP][0]) {
   1043	case IP_VERSION(10, 3, 0):
   1044	case IP_VERSION(10, 3, 2):
   1045	case IP_VERSION(10, 3, 1):
   1046	case IP_VERSION(10, 3, 4):
   1047		tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
   1048		tmp &= 0xffffff00;
   1049		tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
   1050		WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
   1051		tmp |= 0x80;
   1052		WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
   1053		break;
   1054	default:
   1055		tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
   1056		tmp &= 0xffffff00;
   1057		tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
   1058		WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
   1059		tmp |= 0x80;
   1060		WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
   1061		break;
   1062	}
   1063}
   1064
   1065static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev)
   1066{
   1067	int r = 0;
   1068
   1069	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
   1070		r = mes_v10_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE);
   1071		if (r) {
   1072			DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
   1073			return r;
   1074		}
   1075
   1076		r = mes_v10_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE);
   1077		if (r) {
   1078			DRM_ERROR("failed to load MES fw, r=%d\n", r);
   1079			return r;
   1080		}
   1081	}
   1082
   1083	mes_v10_1_enable(adev, true);
   1084
   1085	mes_v10_1_kiq_setting(&adev->gfx.kiq.ring);
   1086
   1087	r = mes_v10_1_queue_init(adev);
   1088	if (r)
   1089		goto failure;
   1090
   1091	return r;
   1092
   1093failure:
   1094	mes_v10_1_hw_fini(adev);
   1095	return r;
   1096}
   1097
   1098static int mes_v10_1_hw_init(void *handle)
   1099{
   1100	int r;
   1101	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1102
   1103	if (!adev->enable_mes_kiq) {
   1104		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
   1105			r = mes_v10_1_load_microcode(adev,
   1106					     AMDGPU_MES_SCHED_PIPE);
   1107			if (r) {
   1108				DRM_ERROR("failed to MES fw, r=%d\n", r);
   1109				return r;
   1110			}
   1111		}
   1112
   1113		mes_v10_1_enable(adev, true);
   1114	}
   1115
   1116	r = mes_v10_1_queue_init(adev);
   1117	if (r)
   1118		goto failure;
   1119
   1120	r = mes_v10_1_set_hw_resources(&adev->mes);
   1121	if (r)
   1122		goto failure;
   1123
   1124	r = mes_v10_1_query_sched_status(&adev->mes);
   1125	if (r) {
   1126		DRM_ERROR("MES is busy\n");
   1127		goto failure;
   1128	}
   1129
   1130	/*
   1131	 * Disable KIQ ring usage from the driver once MES is enabled.
   1132	 * MES uses KIQ ring exclusively so driver cannot access KIQ ring
   1133	 * with MES enabled.
   1134	 */
   1135	adev->gfx.kiq.ring.sched.ready = false;
   1136
   1137	return 0;
   1138
   1139failure:
   1140	mes_v10_1_hw_fini(adev);
   1141	return r;
   1142}
   1143
   1144static int mes_v10_1_hw_fini(void *handle)
   1145{
   1146	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1147
   1148	mes_v10_1_enable(adev, false);
   1149
   1150	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
   1151		mes_v10_1_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
   1152		mes_v10_1_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
   1153	}
   1154
   1155	return 0;
   1156}
   1157
   1158static int mes_v10_1_suspend(void *handle)
   1159{
   1160	int r;
   1161	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1162
   1163	r = amdgpu_mes_suspend(adev);
   1164	if (r)
   1165		return r;
   1166
   1167	return mes_v10_1_hw_fini(adev);
   1168}
   1169
   1170static int mes_v10_1_resume(void *handle)
   1171{
   1172	int r;
   1173	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1174
   1175	r = mes_v10_1_hw_init(adev);
   1176	if (r)
   1177		return r;
   1178
   1179	return amdgpu_mes_resume(adev);
   1180}
   1181
   1182static int mes_v10_0_late_init(void *handle)
   1183{
   1184	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1185
   1186	amdgpu_mes_self_test(adev);
   1187
   1188	return 0;
   1189}
   1190
   1191static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
   1192	.name = "mes_v10_1",
   1193	.late_init = mes_v10_0_late_init,
   1194	.sw_init = mes_v10_1_sw_init,
   1195	.sw_fini = mes_v10_1_sw_fini,
   1196	.hw_init = mes_v10_1_hw_init,
   1197	.hw_fini = mes_v10_1_hw_fini,
   1198	.suspend = mes_v10_1_suspend,
   1199	.resume = mes_v10_1_resume,
   1200};
   1201
   1202const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
   1203	.type = AMD_IP_BLOCK_TYPE_MES,
   1204	.major = 10,
   1205	.minor = 1,
   1206	.rev = 0,
   1207	.funcs = &mes_v10_1_ip_funcs,
   1208};