cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vcn_v3_0.c (73157B)


      1/*
      2 * Copyright 2019 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 */
     23
     24#include <linux/firmware.h>
     25#include "amdgpu.h"
     26#include "amdgpu_vcn.h"
     27#include "amdgpu_pm.h"
     28#include "amdgpu_cs.h"
     29#include "soc15.h"
     30#include "soc15d.h"
     31#include "vcn_v2_0.h"
     32#include "mmsch_v3_0.h"
     33#include "vcn_sw_ring.h"
     34
     35#include "vcn/vcn_3_0_0_offset.h"
     36#include "vcn/vcn_3_0_0_sh_mask.h"
     37#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
     38
     39#include <drm/drm_drv.h>
     40
     41#define VCN_VID_SOC_ADDRESS_2_0					0x1fa00
     42#define VCN1_VID_SOC_ADDRESS_3_0				0x48200
     43
     44#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET			0x27
     45#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET			0x0f
     46#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET			0x10
     47#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET			0x11
     48#define mmUVD_NO_OP_INTERNAL_OFFSET				0x29
     49#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET			0x66
     50#define mmUVD_SCRATCH9_INTERNAL_OFFSET				0xc01d
     51
     52#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET			0x431
     53#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET		0x3b4
     54#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET		0x3b5
     55#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET			0x25c
     56
     57#define VCN_INSTANCES_SIENNA_CICHLID				2
     58#define DEC_SW_RING_ENABLED					FALSE
     59
     60#define RDECODE_MSG_CREATE					0x00000000
     61#define RDECODE_MESSAGE_CREATE					0x00000001
     62
     63static int amdgpu_ih_clientid_vcns[] = {
     64	SOC15_IH_CLIENTID_VCN,
     65	SOC15_IH_CLIENTID_VCN1
     66};
     67
     68static int vcn_v3_0_start_sriov(struct amdgpu_device *adev);
     69static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
     70static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev);
     71static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev);
     72static int vcn_v3_0_set_powergating_state(void *handle,
     73			enum amd_powergating_state state);
     74static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
     75			int inst_idx, struct dpg_pause_state *new_state);
     76
     77static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
     78static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring);
     79
     80/**
     81 * vcn_v3_0_early_init - set function pointers
     82 *
     83 * @handle: amdgpu_device pointer
     84 *
     85 * Set ring and irq function pointers
     86 */
     87static int vcn_v3_0_early_init(void *handle)
     88{
     89	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
     90
     91	if (amdgpu_sriov_vf(adev)) {
     92		adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID;
     93		adev->vcn.harvest_config = 0;
     94		adev->vcn.num_enc_rings = 1;
     95
     96	} else {
     97		if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
     98						 AMDGPU_VCN_HARVEST_VCN1))
     99			/* both instances are harvested, disable the block */
    100			return -ENOENT;
    101
    102		if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 33))
    103			adev->vcn.num_enc_rings = 0;
    104		else
    105			adev->vcn.num_enc_rings = 2;
    106	}
    107
    108	vcn_v3_0_set_dec_ring_funcs(adev);
    109	vcn_v3_0_set_enc_ring_funcs(adev);
    110	vcn_v3_0_set_irq_funcs(adev);
    111
    112	return 0;
    113}
    114
    115/**
    116 * vcn_v3_0_sw_init - sw init for VCN block
    117 *
    118 * @handle: amdgpu_device pointer
    119 *
    120 * Load firmware and sw initialization
    121 */
    122static int vcn_v3_0_sw_init(void *handle)
    123{
    124	struct amdgpu_ring *ring;
    125	int i, j, r;
    126	int vcn_doorbell_index = 0;
    127	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    128
    129	r = amdgpu_vcn_sw_init(adev);
    130	if (r)
    131		return r;
    132
    133	amdgpu_vcn_setup_ucode(adev);
    134
    135	r = amdgpu_vcn_resume(adev);
    136	if (r)
    137		return r;
    138
    139	/*
    140	 * Note: doorbell assignment is fixed for SRIOV multiple VCN engines
    141	 * Formula:
    142	 *   vcn_db_base  = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
    143	 *   dec_ring_i   = vcn_db_base + i * (adev->vcn.num_enc_rings + 1)
    144	 *   enc_ring_i,j = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) + 1 + j
    145	 */
    146	if (amdgpu_sriov_vf(adev)) {
    147		vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1;
    148		/* get DWORD offset */
    149		vcn_doorbell_index = vcn_doorbell_index << 1;
    150	}
    151
    152	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
    153		volatile struct amdgpu_fw_shared *fw_shared;
    154
    155		if (adev->vcn.harvest_config & (1 << i))
    156			continue;
    157
    158		adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
    159		adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
    160		adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
    161		adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
    162		adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
    163		adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
    164
    165		adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
    166		adev->vcn.inst[i].external.scratch9 = SOC15_REG_OFFSET(VCN, i, mmUVD_SCRATCH9);
    167		adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
    168		adev->vcn.inst[i].external.data0 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA0);
    169		adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
    170		adev->vcn.inst[i].external.data1 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA1);
    171		adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
    172		adev->vcn.inst[i].external.cmd = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_CMD);
    173		adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
    174		adev->vcn.inst[i].external.nop = SOC15_REG_OFFSET(VCN, i, mmUVD_NO_OP);
    175
    176		/* VCN DEC TRAP */
    177		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
    178				VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[i].irq);
    179		if (r)
    180			return r;
    181
    182		atomic_set(&adev->vcn.inst[i].sched_score, 0);
    183
    184		ring = &adev->vcn.inst[i].ring_dec;
    185		ring->use_doorbell = true;
    186		if (amdgpu_sriov_vf(adev)) {
    187			ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1);
    188		} else {
    189			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
    190		}
    191		sprintf(ring->name, "vcn_dec_%d", i);
    192		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
    193				     AMDGPU_RING_PRIO_DEFAULT,
    194				     &adev->vcn.inst[i].sched_score);
    195		if (r)
    196			return r;
    197
    198		for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
    199			enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(j);
    200
    201			/* VCN ENC TRAP */
    202			r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
    203				j + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
    204			if (r)
    205				return r;
    206
    207			ring = &adev->vcn.inst[i].ring_enc[j];
    208			ring->use_doorbell = true;
    209			if (amdgpu_sriov_vf(adev)) {
    210				ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1 + j;
    211			} else {
    212				ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
    213			}
    214			sprintf(ring->name, "vcn_enc_%d.%d", i, j);
    215			r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
    216					     hw_prio, &adev->vcn.inst[i].sched_score);
    217			if (r)
    218				return r;
    219		}
    220
    221		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
    222		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) |
    223					     cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) |
    224					     cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB);
    225		fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED);
    226		fw_shared->present_flag_0 |= AMDGPU_VCN_SMU_VERSION_INFO_FLAG;
    227		if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 2))
    228			fw_shared->smu_interface_info.smu_interface_type = 2;
    229		else if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 1))
    230			fw_shared->smu_interface_info.smu_interface_type = 1;
    231
    232		if (amdgpu_vcnfw_log)
    233			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
    234	}
    235
    236	if (amdgpu_sriov_vf(adev)) {
    237		r = amdgpu_virt_alloc_mm_table(adev);
    238		if (r)
    239			return r;
    240	}
    241	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
    242		adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
    243
    244	return 0;
    245}
    246
    247/**
    248 * vcn_v3_0_sw_fini - sw fini for VCN block
    249 *
    250 * @handle: amdgpu_device pointer
    251 *
    252 * VCN suspend and free up sw allocation
    253 */
    254static int vcn_v3_0_sw_fini(void *handle)
    255{
    256	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    257	int i, r, idx;
    258
    259	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
    260		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
    261			volatile struct amdgpu_fw_shared *fw_shared;
    262
    263			if (adev->vcn.harvest_config & (1 << i))
    264				continue;
    265			fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
    266			fw_shared->present_flag_0 = 0;
    267			fw_shared->sw_ring.is_enabled = false;
    268		}
    269
    270		drm_dev_exit(idx);
    271	}
    272
    273	if (amdgpu_sriov_vf(adev))
    274		amdgpu_virt_free_mm_table(adev);
    275
    276	r = amdgpu_vcn_suspend(adev);
    277	if (r)
    278		return r;
    279
    280	r = amdgpu_vcn_sw_fini(adev);
    281
    282	return r;
    283}
    284
    285/**
    286 * vcn_v3_0_hw_init - start and test VCN block
    287 *
    288 * @handle: amdgpu_device pointer
    289 *
    290 * Initialize the hardware, boot up the VCPU and do some testing
    291 */
    292static int vcn_v3_0_hw_init(void *handle)
    293{
    294	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    295	struct amdgpu_ring *ring;
    296	int i, j, r;
    297
    298	if (amdgpu_sriov_vf(adev)) {
    299		r = vcn_v3_0_start_sriov(adev);
    300		if (r)
    301			goto done;
    302
    303		/* initialize VCN dec and enc ring buffers */
    304		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
    305			if (adev->vcn.harvest_config & (1 << i))
    306				continue;
    307
    308			ring = &adev->vcn.inst[i].ring_dec;
    309			if (amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, i)) {
    310				ring->sched.ready = false;
    311				ring->no_scheduler = true;
    312				dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
    313			} else {
    314				ring->wptr = 0;
    315				ring->wptr_old = 0;
    316				vcn_v3_0_dec_ring_set_wptr(ring);
    317				ring->sched.ready = true;
    318			}
    319
    320			for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
    321				ring = &adev->vcn.inst[i].ring_enc[j];
    322				if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
    323					ring->sched.ready = false;
    324					ring->no_scheduler = true;
    325					dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
    326				} else {
    327					ring->wptr = 0;
    328					ring->wptr_old = 0;
    329					vcn_v3_0_enc_ring_set_wptr(ring);
    330					ring->sched.ready = true;
    331				}
    332			}
    333		}
    334	} else {
    335		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
    336			if (adev->vcn.harvest_config & (1 << i))
    337				continue;
    338
    339			ring = &adev->vcn.inst[i].ring_dec;
    340
    341			adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
    342						     ring->doorbell_index, i);
    343
    344			r = amdgpu_ring_test_helper(ring);
    345			if (r)
    346				goto done;
    347
    348			for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
    349				ring = &adev->vcn.inst[i].ring_enc[j];
    350				r = amdgpu_ring_test_helper(ring);
    351				if (r)
    352					goto done;
    353			}
    354		}
    355	}
    356
    357done:
    358	if (!r)
    359		DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
    360			(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
    361
    362	return r;
    363}
    364
    365/**
    366 * vcn_v3_0_hw_fini - stop the hardware block
    367 *
    368 * @handle: amdgpu_device pointer
    369 *
    370 * Stop the VCN block, mark ring as not ready any more
    371 */
    372static int vcn_v3_0_hw_fini(void *handle)
    373{
    374	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    375	int i;
    376
    377	cancel_delayed_work_sync(&adev->vcn.idle_work);
    378
    379	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
    380		if (adev->vcn.harvest_config & (1 << i))
    381			continue;
    382
    383		if (!amdgpu_sriov_vf(adev)) {
    384			if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
    385					(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
    386					 RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
    387				vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
    388			}
    389		}
    390	}
    391
    392	return 0;
    393}
    394
    395/**
    396 * vcn_v3_0_suspend - suspend VCN block
    397 *
    398 * @handle: amdgpu_device pointer
    399 *
    400 * HW fini and suspend VCN block
    401 */
    402static int vcn_v3_0_suspend(void *handle)
    403{
    404	int r;
    405	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    406
    407	r = vcn_v3_0_hw_fini(adev);
    408	if (r)
    409		return r;
    410
    411	r = amdgpu_vcn_suspend(adev);
    412
    413	return r;
    414}
    415
    416/**
    417 * vcn_v3_0_resume - resume VCN block
    418 *
    419 * @handle: amdgpu_device pointer
    420 *
    421 * Resume firmware and hw init VCN block
    422 */
    423static int vcn_v3_0_resume(void *handle)
    424{
    425	int r;
    426	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    427
    428	r = amdgpu_vcn_resume(adev);
    429	if (r)
    430		return r;
    431
    432	r = vcn_v3_0_hw_init(adev);
    433
    434	return r;
    435}
    436
    437/**
    438 * vcn_v3_0_mc_resume - memory controller programming
    439 *
    440 * @adev: amdgpu_device pointer
    441 * @inst: instance number
    442 *
    443 * Let the VCN memory controller know it's offsets
    444 */
    445static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
    446{
    447	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
    448	uint32_t offset;
    449
    450	/* cache window 0: fw */
    451	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
    452		WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
    453			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
    454		WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
    455			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
    456		WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0, 0);
    457		offset = 0;
    458	} else {
    459		WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
    460			lower_32_bits(adev->vcn.inst[inst].gpu_addr));
    461		WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
    462			upper_32_bits(adev->vcn.inst[inst].gpu_addr));
    463		offset = size;
    464		WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0,
    465			AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
    466	}
    467	WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE0, size);
    468
    469	/* cache window 1: stack */
    470	WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
    471		lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
    472	WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
    473		upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
    474	WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET1, 0);
    475	WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
    476
    477	/* cache window 2: context */
    478	WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
    479		lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
    480	WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
    481		upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
    482	WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET2, 0);
    483	WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
    484
    485	/* non-cache window */
    486	WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
    487		lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
    488	WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
    489		upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
    490	WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
    491	WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0,
    492		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
    493}
    494
    495static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
    496{
    497	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
    498	uint32_t offset;
    499
    500	/* cache window 0: fw */
    501	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
    502		if (!indirect) {
    503			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    504				VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
    505				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
    506			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    507				VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
    508				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
    509			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    510				VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
    511		} else {
    512			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    513				VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
    514			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    515				VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
    516			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    517				VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
    518		}
    519		offset = 0;
    520	} else {
    521		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    522			VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
    523			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
    524		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    525			VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
    526			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
    527		offset = size;
    528		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    529			VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0),
    530			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
    531	}
    532
    533	if (!indirect)
    534		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    535			VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
    536	else
    537		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    538			VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
    539
    540	/* cache window 1: stack */
    541	if (!indirect) {
    542		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    543			VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
    544			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
    545		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    546			VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
    547			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
    548		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    549			VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
    550	} else {
    551		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    552			VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
    553		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    554			VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
    555		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    556			VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
    557	}
    558	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    559			VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
    560
    561	/* cache window 2: context */
    562	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    563			VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
    564			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
    565	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    566			VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
    567			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
    568	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    569			VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
    570	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    571			VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
    572
    573	/* non-cache window */
    574	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    575			VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
    576			lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
    577	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    578			VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
    579			upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
    580	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    581			VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
    582	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    583			VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0),
    584			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
    585
    586	/* VCN global tiling registers */
    587	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    588		UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
    589}
    590
    591static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
    592{
    593	uint32_t data = 0;
    594
    595	if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
    596		data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
    597			| 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
    598			| 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
    599			| 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
    600			| 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
    601			| 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
    602			| 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
    603			| 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
    604			| 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
    605			| 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
    606			| 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
    607			| 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
    608			| 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
    609			| 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
    610
    611		WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
    612		SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS,
    613			UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 0x3F3FFFFF);
    614	} else {
    615		data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
    616			| 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
    617			| 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
    618			| 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
    619			| 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
    620			| 1 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
    621			| 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
    622			| 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
    623			| 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
    624			| 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
    625			| 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
    626			| 1 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
    627			| 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
    628			| 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
    629		WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
    630		SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, 0,  0x3F3FFFFF);
    631	}
    632
    633	data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS);
    634	data &= ~0x103;
    635	if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
    636		data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
    637			UVD_POWER_STATUS__UVD_PG_EN_MASK;
    638
    639	WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
    640}
    641
    642static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
    643{
    644	uint32_t data;
    645
    646	if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
    647		/* Before power off, this indicator has to be turned on */
    648		data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS);
    649		data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
    650		data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
    651		WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
    652
    653		data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
    654			| 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
    655			| 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
    656			| 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
    657			| 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
    658			| 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
    659			| 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
    660			| 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
    661			| 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
    662			| 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
    663			| 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
    664			| 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
    665			| 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
    666			| 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
    667		WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
    668
    669		data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
    670			| 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
    671			| 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
    672			| 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
    673			| 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
    674			| 2 << UVD_PGFSM_STATUS__UVDIRL_PWR_STATUS__SHIFT
    675			| 2 << UVD_PGFSM_STATUS__UVDLM_PWR_STATUS__SHIFT
    676			| 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
    677			| 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
    678			| 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
    679			| 2 << UVD_PGFSM_STATUS__UVDAB_PWR_STATUS__SHIFT
    680			| 2 << UVD_PGFSM_STATUS__UVDATD_PWR_STATUS__SHIFT
    681			| 2 << UVD_PGFSM_STATUS__UVDNA_PWR_STATUS__SHIFT
    682			| 2 << UVD_PGFSM_STATUS__UVDNB_PWR_STATUS__SHIFT);
    683		SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, data, 0x3F3FFFFF);
    684	}
    685}
    686
    687/**
    688 * vcn_v3_0_disable_clock_gating - disable VCN clock gating
    689 *
    690 * @adev: amdgpu_device pointer
    691 * @inst: instance number
    692 *
    693 * Disable clock gating for VCN block
    694 */
    695static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
    696{
    697	uint32_t data;
    698
    699	/* VCN disable CGC */
    700	data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
    701	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
    702		data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
    703	else
    704		data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
    705	data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
    706	data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
    707	WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
    708
    709	data = RREG32_SOC15(VCN, inst, mmUVD_CGC_GATE);
    710	data &= ~(UVD_CGC_GATE__SYS_MASK
    711		| UVD_CGC_GATE__UDEC_MASK
    712		| UVD_CGC_GATE__MPEG2_MASK
    713		| UVD_CGC_GATE__REGS_MASK
    714		| UVD_CGC_GATE__RBC_MASK
    715		| UVD_CGC_GATE__LMI_MC_MASK
    716		| UVD_CGC_GATE__LMI_UMC_MASK
    717		| UVD_CGC_GATE__IDCT_MASK
    718		| UVD_CGC_GATE__MPRD_MASK
    719		| UVD_CGC_GATE__MPC_MASK
    720		| UVD_CGC_GATE__LBSI_MASK
    721		| UVD_CGC_GATE__LRBBM_MASK
    722		| UVD_CGC_GATE__UDEC_RE_MASK
    723		| UVD_CGC_GATE__UDEC_CM_MASK
    724		| UVD_CGC_GATE__UDEC_IT_MASK
    725		| UVD_CGC_GATE__UDEC_DB_MASK
    726		| UVD_CGC_GATE__UDEC_MP_MASK
    727		| UVD_CGC_GATE__WCB_MASK
    728		| UVD_CGC_GATE__VCPU_MASK
    729		| UVD_CGC_GATE__MMSCH_MASK);
    730
    731	WREG32_SOC15(VCN, inst, mmUVD_CGC_GATE, data);
    732
    733	SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_CGC_GATE, 0,  0xFFFFFFFF);
    734
    735	data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
    736	data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
    737		| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
    738		| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
    739		| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
    740		| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
    741		| UVD_CGC_CTRL__SYS_MODE_MASK
    742		| UVD_CGC_CTRL__UDEC_MODE_MASK
    743		| UVD_CGC_CTRL__MPEG2_MODE_MASK
    744		| UVD_CGC_CTRL__REGS_MODE_MASK
    745		| UVD_CGC_CTRL__RBC_MODE_MASK
    746		| UVD_CGC_CTRL__LMI_MC_MODE_MASK
    747		| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
    748		| UVD_CGC_CTRL__IDCT_MODE_MASK
    749		| UVD_CGC_CTRL__MPRD_MODE_MASK
    750		| UVD_CGC_CTRL__MPC_MODE_MASK
    751		| UVD_CGC_CTRL__LBSI_MODE_MASK
    752		| UVD_CGC_CTRL__LRBBM_MODE_MASK
    753		| UVD_CGC_CTRL__WCB_MODE_MASK
    754		| UVD_CGC_CTRL__VCPU_MODE_MASK
    755		| UVD_CGC_CTRL__MMSCH_MODE_MASK);
    756	WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
    757
    758	data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE);
    759	data |= (UVD_SUVD_CGC_GATE__SRE_MASK
    760		| UVD_SUVD_CGC_GATE__SIT_MASK
    761		| UVD_SUVD_CGC_GATE__SMP_MASK
    762		| UVD_SUVD_CGC_GATE__SCM_MASK
    763		| UVD_SUVD_CGC_GATE__SDB_MASK
    764		| UVD_SUVD_CGC_GATE__SRE_H264_MASK
    765		| UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
    766		| UVD_SUVD_CGC_GATE__SIT_H264_MASK
    767		| UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
    768		| UVD_SUVD_CGC_GATE__SCM_H264_MASK
    769		| UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
    770		| UVD_SUVD_CGC_GATE__SDB_H264_MASK
    771		| UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
    772		| UVD_SUVD_CGC_GATE__SCLR_MASK
    773		| UVD_SUVD_CGC_GATE__ENT_MASK
    774		| UVD_SUVD_CGC_GATE__IME_MASK
    775		| UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
    776		| UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
    777		| UVD_SUVD_CGC_GATE__SITE_MASK
    778		| UVD_SUVD_CGC_GATE__SRE_VP9_MASK
    779		| UVD_SUVD_CGC_GATE__SCM_VP9_MASK
    780		| UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
    781		| UVD_SUVD_CGC_GATE__SDB_VP9_MASK
    782		| UVD_SUVD_CGC_GATE__IME_HEVC_MASK
    783		| UVD_SUVD_CGC_GATE__EFC_MASK
    784		| UVD_SUVD_CGC_GATE__SAOE_MASK
    785		| UVD_SUVD_CGC_GATE__SRE_AV1_MASK
    786		| UVD_SUVD_CGC_GATE__FBC_PCLK_MASK
    787		| UVD_SUVD_CGC_GATE__FBC_CCLK_MASK
    788		| UVD_SUVD_CGC_GATE__SCM_AV1_MASK
    789		| UVD_SUVD_CGC_GATE__SMPA_MASK);
    790	WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE, data);
    791
    792	data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2);
    793	data |= (UVD_SUVD_CGC_GATE2__MPBE0_MASK
    794		| UVD_SUVD_CGC_GATE2__MPBE1_MASK
    795		| UVD_SUVD_CGC_GATE2__SIT_AV1_MASK
    796		| UVD_SUVD_CGC_GATE2__SDB_AV1_MASK
    797		| UVD_SUVD_CGC_GATE2__MPC1_MASK);
    798	WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2, data);
    799
    800	data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL);
    801	data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
    802		| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
    803		| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
    804		| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
    805		| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
    806		| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
    807		| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
    808		| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
    809		| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK
    810		| UVD_SUVD_CGC_CTRL__EFC_MODE_MASK
    811		| UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK
    812		| UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK
    813		| UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK
    814		| UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK
    815		| UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK
    816		| UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK
    817		| UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK
    818		| UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK
    819		| UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK);
    820	WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
    821}
    822
    823static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
    824		uint8_t sram_sel, int inst_idx, uint8_t indirect)
    825{
    826	uint32_t reg_data = 0;
    827
    828	/* enable sw clock gating control */
    829	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
    830		reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
    831	else
    832		reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
    833	reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
    834	reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
    835	reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
    836		 UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
    837		 UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
    838		 UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
    839		 UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
    840		 UVD_CGC_CTRL__SYS_MODE_MASK |
    841		 UVD_CGC_CTRL__UDEC_MODE_MASK |
    842		 UVD_CGC_CTRL__MPEG2_MODE_MASK |
    843		 UVD_CGC_CTRL__REGS_MODE_MASK |
    844		 UVD_CGC_CTRL__RBC_MODE_MASK |
    845		 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
    846		 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
    847		 UVD_CGC_CTRL__IDCT_MODE_MASK |
    848		 UVD_CGC_CTRL__MPRD_MODE_MASK |
    849		 UVD_CGC_CTRL__MPC_MODE_MASK |
    850		 UVD_CGC_CTRL__LBSI_MODE_MASK |
    851		 UVD_CGC_CTRL__LRBBM_MODE_MASK |
    852		 UVD_CGC_CTRL__WCB_MODE_MASK |
    853		 UVD_CGC_CTRL__VCPU_MODE_MASK |
    854		 UVD_CGC_CTRL__MMSCH_MODE_MASK);
    855	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    856		VCN, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
    857
    858	/* turn off clock gating */
    859	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    860		VCN, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect);
    861
    862	/* turn on SUVD clock gating */
    863	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    864		VCN, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
    865
    866	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
    867	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    868		VCN, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
    869}
    870
    871/**
    872 * vcn_v3_0_enable_clock_gating - enable VCN clock gating
    873 *
    874 * @adev: amdgpu_device pointer
    875 * @inst: instance number
    876 *
    877 * Enable clock gating for VCN block
    878 */
    879static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
    880{
    881	uint32_t data;
    882
    883	/* enable VCN CGC */
    884	data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
    885	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
    886		data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
    887	else
    888		data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
    889	data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
    890	data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
    891	WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
    892
    893	data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
    894	data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
    895		| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
    896		| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
    897		| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
    898		| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
    899		| UVD_CGC_CTRL__SYS_MODE_MASK
    900		| UVD_CGC_CTRL__UDEC_MODE_MASK
    901		| UVD_CGC_CTRL__MPEG2_MODE_MASK
    902		| UVD_CGC_CTRL__REGS_MODE_MASK
    903		| UVD_CGC_CTRL__RBC_MODE_MASK
    904		| UVD_CGC_CTRL__LMI_MC_MODE_MASK
    905		| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
    906		| UVD_CGC_CTRL__IDCT_MODE_MASK
    907		| UVD_CGC_CTRL__MPRD_MODE_MASK
    908		| UVD_CGC_CTRL__MPC_MODE_MASK
    909		| UVD_CGC_CTRL__LBSI_MODE_MASK
    910		| UVD_CGC_CTRL__LRBBM_MODE_MASK
    911		| UVD_CGC_CTRL__WCB_MODE_MASK
    912		| UVD_CGC_CTRL__VCPU_MODE_MASK
    913		| UVD_CGC_CTRL__MMSCH_MODE_MASK);
    914	WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
    915
    916	data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL);
    917	data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
    918		| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
    919		| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
    920		| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
    921		| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
    922		| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
    923		| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
    924		| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
    925		| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK
    926		| UVD_SUVD_CGC_CTRL__EFC_MODE_MASK
    927		| UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK
    928		| UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK
    929		| UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK
    930		| UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK
    931		| UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK
    932		| UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK
    933		| UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK
    934		| UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK
    935		| UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK);
    936	WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
    937}
    938
    939static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
    940{
    941	volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
    942	struct amdgpu_ring *ring;
    943	uint32_t rb_bufsz, tmp;
    944
    945	/* disable register anti-hang mechanism */
    946	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
    947		~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
    948	/* enable dynamic power gating mode */
    949	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS);
    950	tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
    951	tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
    952	WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);
    953
    954	if (indirect)
    955		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
    956
    957	/* enable clock gating */
    958	vcn_v3_0_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
    959
    960	/* enable VCPU clock */
    961	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
    962	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
    963	tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
    964	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    965		VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
    966
    967	/* disable master interupt */
    968	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    969		VCN, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect);
    970
    971	/* setup mmUVD_LMI_CTRL */
    972	tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
    973		UVD_LMI_CTRL__REQ_MODE_MASK |
    974		UVD_LMI_CTRL__CRC_RESET_MASK |
    975		UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
    976		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
    977		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
    978		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
    979		0x00100000L);
    980	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    981		VCN, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect);
    982
    983	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    984		VCN, inst_idx, mmUVD_MPC_CNTL),
    985		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
    986
    987	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    988		VCN, inst_idx, mmUVD_MPC_SET_MUXA0),
    989		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
    990		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
    991		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
    992		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
    993
    994	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
    995		VCN, inst_idx, mmUVD_MPC_SET_MUXB0),
    996		 ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
    997		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
    998		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
    999		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
   1000
   1001	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
   1002		VCN, inst_idx, mmUVD_MPC_SET_MUX),
   1003		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
   1004		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
   1005		 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
   1006
   1007	vcn_v3_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
   1008
   1009	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
   1010		VCN, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
   1011	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
   1012		VCN, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
   1013
   1014	/* enable LMI MC and UMC channels */
   1015	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
   1016		VCN, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect);
   1017
   1018	/* unblock VCPU register access */
   1019	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
   1020		VCN, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
   1021
   1022	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
   1023	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
   1024	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
   1025		VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
   1026
   1027	/* enable master interrupt */
   1028	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
   1029		VCN, inst_idx, mmUVD_MASTINT_EN),
   1030		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
   1031
   1032	/* add nop to workaround PSP size check */
   1033	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
   1034		VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
   1035
   1036	if (indirect)
   1037		psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
   1038			(uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
   1039				(uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
   1040
   1041	ring = &adev->vcn.inst[inst_idx].ring_dec;
   1042	/* force RBC into idle state */
   1043	rb_bufsz = order_base_2(ring->ring_size);
   1044	tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
   1045	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
   1046	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
   1047	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
   1048	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
   1049	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
   1050
   1051	/* Stall DPG before WPTR/RPTR reset */
   1052	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
   1053		UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
   1054		~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
   1055	fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
   1056
   1057	/* set the write pointer delay */
   1058	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
   1059
   1060	/* set the wb address */
   1061	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
   1062		(upper_32_bits(ring->gpu_addr) >> 2));
   1063
   1064	/* programm the RB_BASE for ring buffer */
   1065	WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
   1066		lower_32_bits(ring->gpu_addr));
   1067	WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
   1068		upper_32_bits(ring->gpu_addr));
   1069
   1070	/* Initialize the ring buffer's read and write pointers */
   1071	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0);
   1072
   1073	WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0);
   1074
   1075	ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR);
   1076	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
   1077		lower_32_bits(ring->wptr));
   1078
   1079	/* Reset FW shared memory RBC WPTR/RPTR */
   1080	fw_shared->rb.rptr = 0;
   1081	fw_shared->rb.wptr = lower_32_bits(ring->wptr);
   1082
   1083	/*resetting done, fw can check RB ring */
   1084	fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
   1085
   1086	/* Unstall DPG */
   1087	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
   1088		0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
   1089
   1090	return 0;
   1091}
   1092
   1093static int vcn_v3_0_start(struct amdgpu_device *adev)
   1094{
   1095	volatile struct amdgpu_fw_shared *fw_shared;
   1096	struct amdgpu_ring *ring;
   1097	uint32_t rb_bufsz, tmp;
   1098	int i, j, k, r;
   1099
   1100	if (adev->pm.dpm_enabled)
   1101		amdgpu_dpm_enable_uvd(adev, true);
   1102
   1103	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
   1104		if (adev->vcn.harvest_config & (1 << i))
   1105			continue;
   1106
   1107		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
   1108			r = vcn_v3_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
   1109			continue;
   1110		}
   1111
   1112		/* disable VCN power gating */
   1113		vcn_v3_0_disable_static_power_gating(adev, i);
   1114
   1115		/* set VCN status busy */
   1116		tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
   1117		WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
   1118
   1119		/*SW clock gating */
   1120		vcn_v3_0_disable_clock_gating(adev, i);
   1121
   1122		/* enable VCPU clock */
   1123		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
   1124			UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
   1125
   1126		/* disable master interrupt */
   1127		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
   1128			~UVD_MASTINT_EN__VCPU_EN_MASK);
   1129
   1130		/* enable LMI MC and UMC channels */
   1131		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
   1132			~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
   1133
   1134		tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
   1135		tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
   1136		tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
   1137		WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
   1138
   1139		/* setup mmUVD_LMI_CTRL */
   1140		tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
   1141		WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp |
   1142			UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK	|
   1143			UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
   1144			UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
   1145			UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
   1146
   1147		/* setup mmUVD_MPC_CNTL */
   1148		tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
   1149		tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
   1150		tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
   1151		WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
   1152
   1153		/* setup UVD_MPC_SET_MUXA0 */
   1154		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
   1155			((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
   1156			(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
   1157			(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
   1158			(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
   1159
   1160		/* setup UVD_MPC_SET_MUXB0 */
   1161		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
   1162			((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
   1163			(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
   1164			(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
   1165			(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
   1166
   1167		/* setup mmUVD_MPC_SET_MUX */
   1168		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
   1169			((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
   1170			(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
   1171			(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
   1172
   1173		vcn_v3_0_mc_resume(adev, i);
   1174
   1175		/* VCN global tiling registers */
   1176		WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG,
   1177			adev->gfx.config.gb_addr_config);
   1178
   1179		/* unblock VCPU register access */
   1180		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
   1181			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
   1182
   1183		/* release VCPU reset to boot */
   1184		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
   1185			~UVD_VCPU_CNTL__BLK_RST_MASK);
   1186
   1187		for (j = 0; j < 10; ++j) {
   1188			uint32_t status;
   1189
   1190			for (k = 0; k < 100; ++k) {
   1191				status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
   1192				if (status & 2)
   1193					break;
   1194				mdelay(10);
   1195			}
   1196			r = 0;
   1197			if (status & 2)
   1198				break;
   1199
   1200			DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
   1201			WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
   1202				UVD_VCPU_CNTL__BLK_RST_MASK,
   1203				~UVD_VCPU_CNTL__BLK_RST_MASK);
   1204			mdelay(10);
   1205			WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
   1206				~UVD_VCPU_CNTL__BLK_RST_MASK);
   1207
   1208			mdelay(10);
   1209			r = -1;
   1210		}
   1211
   1212		if (r) {
   1213			DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i);
   1214			return r;
   1215		}
   1216
   1217		/* enable master interrupt */
   1218		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
   1219			UVD_MASTINT_EN__VCPU_EN_MASK,
   1220			~UVD_MASTINT_EN__VCPU_EN_MASK);
   1221
   1222		/* clear the busy bit of VCN_STATUS */
   1223		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
   1224			~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
   1225
   1226		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
   1227
   1228		ring = &adev->vcn.inst[i].ring_dec;
   1229		/* force RBC into idle state */
   1230		rb_bufsz = order_base_2(ring->ring_size);
   1231		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
   1232		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
   1233		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
   1234		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
   1235		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
   1236		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
   1237
   1238		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
   1239		fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
   1240
   1241		/* programm the RB_BASE for ring buffer */
   1242		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
   1243			lower_32_bits(ring->gpu_addr));
   1244		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
   1245			upper_32_bits(ring->gpu_addr));
   1246
   1247		/* Initialize the ring buffer's read and write pointers */
   1248		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
   1249
   1250		WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0);
   1251		ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
   1252		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
   1253			lower_32_bits(ring->wptr));
   1254		fw_shared->rb.wptr = lower_32_bits(ring->wptr);
   1255		fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
   1256
   1257		if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) {
   1258			fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
   1259			ring = &adev->vcn.inst[i].ring_enc[0];
   1260			WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
   1261			WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
   1262			WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
   1263			WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
   1264			WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
   1265			fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
   1266
   1267			fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
   1268			ring = &adev->vcn.inst[i].ring_enc[1];
   1269			WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
   1270			WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
   1271			WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
   1272			WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
   1273			WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
   1274			fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
   1275		}
   1276	}
   1277
   1278	return 0;
   1279}
   1280
   1281static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
   1282{
   1283	int i, j;
   1284	struct amdgpu_ring *ring;
   1285	uint64_t cache_addr;
   1286	uint64_t rb_addr;
   1287	uint64_t ctx_addr;
   1288	uint32_t param, resp, expected;
   1289	uint32_t offset, cache_size;
   1290	uint32_t tmp, timeout;
   1291
   1292	struct amdgpu_mm_table *table = &adev->virt.mm_table;
   1293	uint32_t *table_loc;
   1294	uint32_t table_size;
   1295	uint32_t size, size_dw;
   1296
   1297	struct mmsch_v3_0_cmd_direct_write
   1298		direct_wt = { {0} };
   1299	struct mmsch_v3_0_cmd_direct_read_modify_write
   1300		direct_rd_mod_wt = { {0} };
   1301	struct mmsch_v3_0_cmd_end end = { {0} };
   1302	struct mmsch_v3_0_init_header header;
   1303
   1304	direct_wt.cmd_header.command_type =
   1305		MMSCH_COMMAND__DIRECT_REG_WRITE;
   1306	direct_rd_mod_wt.cmd_header.command_type =
   1307		MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
   1308	end.cmd_header.command_type =
   1309		MMSCH_COMMAND__END;
   1310
   1311	header.version = MMSCH_VERSION;
   1312	header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2;
   1313	for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) {
   1314		header.inst[i].init_status = 0;
   1315		header.inst[i].table_offset = 0;
   1316		header.inst[i].table_size = 0;
   1317	}
   1318
   1319	table_loc = (uint32_t *)table->cpu_addr;
   1320	table_loc += header.total_size;
   1321	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
   1322		if (adev->vcn.harvest_config & (1 << i))
   1323			continue;
   1324
   1325		table_size = 0;
   1326
   1327		MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
   1328			mmUVD_STATUS),
   1329			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
   1330
   1331		cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
   1332
   1333		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
   1334			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1335				mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
   1336				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
   1337			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1338				mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
   1339				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
   1340			offset = 0;
   1341			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1342				mmUVD_VCPU_CACHE_OFFSET0),
   1343				0);
   1344		} else {
   1345			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1346				mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
   1347				lower_32_bits(adev->vcn.inst[i].gpu_addr));
   1348			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1349				mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
   1350				upper_32_bits(adev->vcn.inst[i].gpu_addr));
   1351			offset = cache_size;
   1352			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1353				mmUVD_VCPU_CACHE_OFFSET0),
   1354				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
   1355		}
   1356
   1357		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1358			mmUVD_VCPU_CACHE_SIZE0),
   1359			cache_size);
   1360
   1361		cache_addr = adev->vcn.inst[i].gpu_addr + offset;
   1362		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1363			mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
   1364			lower_32_bits(cache_addr));
   1365		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1366			mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
   1367			upper_32_bits(cache_addr));
   1368		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1369			mmUVD_VCPU_CACHE_OFFSET1),
   1370			0);
   1371		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1372			mmUVD_VCPU_CACHE_SIZE1),
   1373			AMDGPU_VCN_STACK_SIZE);
   1374
   1375		cache_addr = adev->vcn.inst[i].gpu_addr + offset +
   1376			AMDGPU_VCN_STACK_SIZE;
   1377		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1378			mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
   1379			lower_32_bits(cache_addr));
   1380		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1381			mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
   1382			upper_32_bits(cache_addr));
   1383		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1384			mmUVD_VCPU_CACHE_OFFSET2),
   1385			0);
   1386		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1387			mmUVD_VCPU_CACHE_SIZE2),
   1388			AMDGPU_VCN_CONTEXT_SIZE);
   1389
   1390		for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
   1391			ring = &adev->vcn.inst[i].ring_enc[j];
   1392			ring->wptr = 0;
   1393			rb_addr = ring->gpu_addr;
   1394			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1395				mmUVD_RB_BASE_LO),
   1396				lower_32_bits(rb_addr));
   1397			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1398				mmUVD_RB_BASE_HI),
   1399				upper_32_bits(rb_addr));
   1400			MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1401				mmUVD_RB_SIZE),
   1402				ring->ring_size / 4);
   1403		}
   1404
   1405		ring = &adev->vcn.inst[i].ring_dec;
   1406		ring->wptr = 0;
   1407		rb_addr = ring->gpu_addr;
   1408		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1409			mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
   1410			lower_32_bits(rb_addr));
   1411		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1412			mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
   1413			upper_32_bits(rb_addr));
   1414		/* force RBC into idle state */
   1415		tmp = order_base_2(ring->ring_size);
   1416		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, tmp);
   1417		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
   1418		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
   1419		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
   1420		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
   1421		MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
   1422			mmUVD_RBC_RB_CNTL),
   1423			tmp);
   1424
   1425		/* add end packet */
   1426		MMSCH_V3_0_INSERT_END();
   1427
   1428		/* refine header */
   1429		header.inst[i].init_status = 0;
   1430		header.inst[i].table_offset = header.total_size;
   1431		header.inst[i].table_size = table_size;
   1432		header.total_size += table_size;
   1433	}
   1434
   1435	/* Update init table header in memory */
   1436	size = sizeof(struct mmsch_v3_0_init_header);
   1437	table_loc = (uint32_t *)table->cpu_addr;
   1438	memcpy((void *)table_loc, &header, size);
   1439
   1440	/* message MMSCH (in VCN[0]) to initialize this client
   1441	 * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
   1442	 * of memory descriptor location
   1443	 */
   1444	ctx_addr = table->gpu_addr;
   1445	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
   1446	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
   1447
   1448	/* 2, update vmid of descriptor */
   1449	tmp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID);
   1450	tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
   1451	/* use domain0 for MM scheduler */
   1452	tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
   1453	WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, tmp);
   1454
   1455	/* 3, notify mmsch about the size of this descriptor */
   1456	size = header.total_size;
   1457	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size);
   1458
   1459	/* 4, set resp to zero */
   1460	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
   1461
   1462	/* 5, kick off the initialization and wait until
   1463	 * MMSCH_VF_MAILBOX_RESP becomes non-zero
   1464	 */
   1465	param = 0x10000001;
   1466	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, param);
   1467	tmp = 0;
   1468	timeout = 1000;
   1469	resp = 0;
   1470	expected = param + 1;
   1471	while (resp != expected) {
   1472		resp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
   1473		if (resp == expected)
   1474			break;
   1475
   1476		udelay(10);
   1477		tmp = tmp + 10;
   1478		if (tmp >= timeout) {
   1479			DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
   1480				" waiting for mmMMSCH_VF_MAILBOX_RESP "\
   1481				"(expected=0x%08x, readback=0x%08x)\n",
   1482				tmp, expected, resp);
   1483			return -EBUSY;
   1484		}
   1485	}
   1486
   1487	return 0;
   1488}
   1489
   1490static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
   1491{
   1492	struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
   1493	uint32_t tmp;
   1494
   1495	vcn_v3_0_pause_dpg_mode(adev, inst_idx, &state);
   1496
   1497	/* Wait for power status to be 1 */
   1498	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
   1499		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
   1500
   1501	/* wait for read ptr to be equal to write ptr */
   1502	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
   1503	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
   1504
   1505	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
   1506	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
   1507
   1508	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
   1509	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
   1510
   1511	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
   1512		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
   1513
   1514	/* disable dynamic power gating mode */
   1515	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
   1516		~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
   1517
   1518	return 0;
   1519}
   1520
   1521static int vcn_v3_0_stop(struct amdgpu_device *adev)
   1522{
   1523	uint32_t tmp;
   1524	int i, r = 0;
   1525
   1526	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
   1527		if (adev->vcn.harvest_config & (1 << i))
   1528			continue;
   1529
   1530		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
   1531			r = vcn_v3_0_stop_dpg_mode(adev, i);
   1532			continue;
   1533		}
   1534
   1535		/* wait for vcn idle */
   1536		r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
   1537		if (r)
   1538			return r;
   1539
   1540		tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
   1541			UVD_LMI_STATUS__READ_CLEAN_MASK |
   1542			UVD_LMI_STATUS__WRITE_CLEAN_MASK |
   1543			UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
   1544		r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
   1545		if (r)
   1546			return r;
   1547
   1548		/* disable LMI UMC channel */
   1549		tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
   1550		tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
   1551		WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
   1552		tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
   1553			UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
   1554		r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
   1555		if (r)
   1556			return r;
   1557
   1558		/* block VCPU register access */
   1559		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
   1560			UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
   1561			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
   1562
   1563		/* reset VCPU */
   1564		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
   1565			UVD_VCPU_CNTL__BLK_RST_MASK,
   1566			~UVD_VCPU_CNTL__BLK_RST_MASK);
   1567
   1568		/* disable VCPU clock */
   1569		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
   1570			~(UVD_VCPU_CNTL__CLK_EN_MASK));
   1571
   1572		/* apply soft reset */
   1573		tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
   1574		tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
   1575		WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
   1576		tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
   1577		tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
   1578		WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
   1579
   1580		/* clear status */
   1581		WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
   1582
   1583		/* apply HW clock gating */
   1584		vcn_v3_0_enable_clock_gating(adev, i);
   1585
   1586		/* enable VCN power gating */
   1587		vcn_v3_0_enable_static_power_gating(adev, i);
   1588	}
   1589
   1590	if (adev->pm.dpm_enabled)
   1591		amdgpu_dpm_enable_uvd(adev, false);
   1592
   1593	return 0;
   1594}
   1595
   1596static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
   1597		   int inst_idx, struct dpg_pause_state *new_state)
   1598{
   1599	volatile struct amdgpu_fw_shared *fw_shared;
   1600	struct amdgpu_ring *ring;
   1601	uint32_t reg_data = 0;
   1602	int ret_code;
   1603
   1604	/* pause/unpause if state is changed */
   1605	if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
   1606		DRM_DEBUG("dpg pause state changed %d -> %d",
   1607			adev->vcn.inst[inst_idx].pause_state.fw_based,	new_state->fw_based);
   1608		reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) &
   1609			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
   1610
   1611		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
   1612			ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
   1613				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
   1614
   1615			if (!ret_code) {
   1616				/* pause DPG */
   1617				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
   1618				WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
   1619
   1620				/* wait for ACK */
   1621				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
   1622					UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
   1623					UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
   1624
   1625				/* Stall DPG before WPTR/RPTR reset */
   1626				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
   1627					UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
   1628					~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
   1629
   1630				if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) {
   1631					/* Restore */
   1632					fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
   1633					fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
   1634					ring = &adev->vcn.inst[inst_idx].ring_enc[0];
   1635					ring->wptr = 0;
   1636					WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
   1637					WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
   1638					WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
   1639					WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
   1640					WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
   1641					fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
   1642
   1643					fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
   1644					ring = &adev->vcn.inst[inst_idx].ring_enc[1];
   1645					ring->wptr = 0;
   1646					WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
   1647					WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
   1648					WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
   1649					WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
   1650					WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
   1651					fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
   1652
   1653					/* restore wptr/rptr with pointers saved in FW shared memory*/
   1654					WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, fw_shared->rb.rptr);
   1655					WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, fw_shared->rb.wptr);
   1656				}
   1657
   1658				/* Unstall DPG */
   1659				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
   1660					0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
   1661
   1662				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
   1663					UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
   1664			}
   1665		} else {
   1666			/* unpause dpg, no need to wait */
   1667			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
   1668			WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
   1669		}
   1670		adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
   1671	}
   1672
   1673	return 0;
   1674}
   1675
   1676/**
   1677 * vcn_v3_0_dec_ring_get_rptr - get read pointer
   1678 *
   1679 * @ring: amdgpu_ring pointer
   1680 *
   1681 * Returns the current hardware read pointer
   1682 */
   1683static uint64_t vcn_v3_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
   1684{
   1685	struct amdgpu_device *adev = ring->adev;
   1686
   1687	return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR);
   1688}
   1689
   1690/**
   1691 * vcn_v3_0_dec_ring_get_wptr - get write pointer
   1692 *
   1693 * @ring: amdgpu_ring pointer
   1694 *
   1695 * Returns the current hardware write pointer
   1696 */
   1697static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
   1698{
   1699	struct amdgpu_device *adev = ring->adev;
   1700
   1701	if (ring->use_doorbell)
   1702		return *ring->wptr_cpu_addr;
   1703	else
   1704		return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
   1705}
   1706
   1707/**
   1708 * vcn_v3_0_dec_ring_set_wptr - set write pointer
   1709 *
   1710 * @ring: amdgpu_ring pointer
   1711 *
   1712 * Commits the write pointer to the hardware
   1713 */
   1714static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
   1715{
   1716	struct amdgpu_device *adev = ring->adev;
   1717	volatile struct amdgpu_fw_shared *fw_shared;
   1718
   1719	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
   1720		/*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */
   1721		fw_shared = adev->vcn.inst[ring->me].fw_shared.cpu_addr;
   1722		fw_shared->rb.wptr = lower_32_bits(ring->wptr);
   1723		WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2,
   1724			lower_32_bits(ring->wptr));
   1725	}
   1726
   1727	if (ring->use_doorbell) {
   1728		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
   1729		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
   1730	} else {
   1731		WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
   1732	}
   1733}
   1734
   1735static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = {
   1736	.type = AMDGPU_RING_TYPE_VCN_DEC,
   1737	.align_mask = 0x3f,
   1738	.nop = VCN_DEC_SW_CMD_NO_OP,
   1739	.secure_submission_supported = true,
   1740	.vmhub = AMDGPU_MMHUB_0,
   1741	.get_rptr = vcn_v3_0_dec_ring_get_rptr,
   1742	.get_wptr = vcn_v3_0_dec_ring_get_wptr,
   1743	.set_wptr = vcn_v3_0_dec_ring_set_wptr,
   1744	.emit_frame_size =
   1745		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
   1746		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
   1747		VCN_SW_RING_EMIT_FRAME_SIZE,
   1748	.emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */
   1749	.emit_ib = vcn_dec_sw_ring_emit_ib,
   1750	.emit_fence = vcn_dec_sw_ring_emit_fence,
   1751	.emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush,
   1752	.test_ring = amdgpu_vcn_dec_sw_ring_test_ring,
   1753	.test_ib = NULL,//amdgpu_vcn_dec_sw_ring_test_ib,
   1754	.insert_nop = amdgpu_ring_insert_nop,
   1755	.insert_end = vcn_dec_sw_ring_insert_end,
   1756	.pad_ib = amdgpu_ring_generic_pad_ib,
   1757	.begin_use = amdgpu_vcn_ring_begin_use,
   1758	.end_use = amdgpu_vcn_ring_end_use,
   1759	.emit_wreg = vcn_dec_sw_ring_emit_wreg,
   1760	.emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait,
   1761	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
   1762};
   1763
   1764static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p)
   1765{
   1766	struct drm_gpu_scheduler **scheds;
   1767
   1768	/* The create msg must be in the first IB submitted */
   1769	if (atomic_read(&p->entity->fence_seq))
   1770		return -EINVAL;
   1771
   1772	scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
   1773		[AMDGPU_RING_PRIO_DEFAULT].sched;
   1774	drm_sched_entity_modify_sched(p->entity, scheds, 1);
   1775	return 0;
   1776}
   1777
   1778static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
   1779{
   1780	struct ttm_operation_ctx ctx = { false, false };
   1781	struct amdgpu_bo_va_mapping *map;
   1782	uint32_t *msg, num_buffers;
   1783	struct amdgpu_bo *bo;
   1784	uint64_t start, end;
   1785	unsigned int i;
   1786	void * ptr;
   1787	int r;
   1788
   1789	addr &= AMDGPU_GMC_HOLE_MASK;
   1790	r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
   1791	if (r) {
   1792		DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
   1793		return r;
   1794	}
   1795
   1796	start = map->start * AMDGPU_GPU_PAGE_SIZE;
   1797	end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
   1798	if (addr & 0x7) {
   1799		DRM_ERROR("VCN messages must be 8 byte aligned!\n");
   1800		return -EINVAL;
   1801	}
   1802
   1803	bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
   1804	amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
   1805	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
   1806	if (r) {
   1807		DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
   1808		return r;
   1809	}
   1810
   1811	r = amdgpu_bo_kmap(bo, &ptr);
   1812	if (r) {
   1813		DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
   1814		return r;
   1815	}
   1816
   1817	msg = ptr + addr - start;
   1818
   1819	/* Check length */
   1820	if (msg[1] > end - addr) {
   1821		r = -EINVAL;
   1822		goto out;
   1823	}
   1824
   1825	if (msg[3] != RDECODE_MSG_CREATE)
   1826		goto out;
   1827
   1828	num_buffers = msg[2];
   1829	for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
   1830		uint32_t offset, size, *create;
   1831
   1832		if (msg[0] != RDECODE_MESSAGE_CREATE)
   1833			continue;
   1834
   1835		offset = msg[1];
   1836		size = msg[2];
   1837
   1838		if (offset + size > end) {
   1839			r = -EINVAL;
   1840			goto out;
   1841		}
   1842
   1843		create = ptr + addr + offset - start;
   1844
   1845		/* H246, HEVC and VP9 can run on any instance */
   1846		if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
   1847			continue;
   1848
   1849		r = vcn_v3_0_limit_sched(p);
   1850		if (r)
   1851			goto out;
   1852	}
   1853
   1854out:
   1855	amdgpu_bo_kunmap(bo);
   1856	return r;
   1857}
   1858
   1859static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
   1860					   struct amdgpu_job *job,
   1861					   struct amdgpu_ib *ib)
   1862{
   1863	struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
   1864	uint32_t msg_lo = 0, msg_hi = 0;
   1865	unsigned i;
   1866	int r;
   1867
   1868	/* The first instance can decode anything */
   1869	if (!ring->me)
   1870		return 0;
   1871
   1872	for (i = 0; i < ib->length_dw; i += 2) {
   1873		uint32_t reg = amdgpu_ib_get_value(ib, i);
   1874		uint32_t val = amdgpu_ib_get_value(ib, i + 1);
   1875
   1876		if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
   1877			msg_lo = val;
   1878		} else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {
   1879			msg_hi = val;
   1880		} else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
   1881			   val == 0) {
   1882			r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo);
   1883			if (r)
   1884				return r;
   1885		}
   1886	}
   1887	return 0;
   1888}
   1889
   1890static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
   1891	.type = AMDGPU_RING_TYPE_VCN_DEC,
   1892	.align_mask = 0xf,
   1893	.secure_submission_supported = true,
   1894	.vmhub = AMDGPU_MMHUB_0,
   1895	.get_rptr = vcn_v3_0_dec_ring_get_rptr,
   1896	.get_wptr = vcn_v3_0_dec_ring_get_wptr,
   1897	.set_wptr = vcn_v3_0_dec_ring_set_wptr,
   1898	.patch_cs_in_place = vcn_v3_0_ring_patch_cs_in_place,
   1899	.emit_frame_size =
   1900		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
   1901		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
   1902		8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
   1903		14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
   1904		6,
   1905	.emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
   1906	.emit_ib = vcn_v2_0_dec_ring_emit_ib,
   1907	.emit_fence = vcn_v2_0_dec_ring_emit_fence,
   1908	.emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
   1909	.test_ring = vcn_v2_0_dec_ring_test_ring,
   1910	.test_ib = amdgpu_vcn_dec_ring_test_ib,
   1911	.insert_nop = vcn_v2_0_dec_ring_insert_nop,
   1912	.insert_start = vcn_v2_0_dec_ring_insert_start,
   1913	.insert_end = vcn_v2_0_dec_ring_insert_end,
   1914	.pad_ib = amdgpu_ring_generic_pad_ib,
   1915	.begin_use = amdgpu_vcn_ring_begin_use,
   1916	.end_use = amdgpu_vcn_ring_end_use,
   1917	.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
   1918	.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
   1919	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
   1920};
   1921
   1922/**
   1923 * vcn_v3_0_enc_ring_get_rptr - get enc read pointer
   1924 *
   1925 * @ring: amdgpu_ring pointer
   1926 *
   1927 * Returns the current hardware enc read pointer
   1928 */
   1929static uint64_t vcn_v3_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
   1930{
   1931	struct amdgpu_device *adev = ring->adev;
   1932
   1933	if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
   1934		return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR);
   1935	else
   1936		return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2);
   1937}
   1938
   1939/**
   1940 * vcn_v3_0_enc_ring_get_wptr - get enc write pointer
   1941 *
   1942 * @ring: amdgpu_ring pointer
   1943 *
   1944 * Returns the current hardware enc write pointer
   1945 */
   1946static uint64_t vcn_v3_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
   1947{
   1948	struct amdgpu_device *adev = ring->adev;
   1949
   1950	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
   1951		if (ring->use_doorbell)
   1952			return *ring->wptr_cpu_addr;
   1953		else
   1954			return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
   1955	} else {
   1956		if (ring->use_doorbell)
   1957			return *ring->wptr_cpu_addr;
   1958		else
   1959			return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
   1960	}
   1961}
   1962
   1963/**
   1964 * vcn_v3_0_enc_ring_set_wptr - set enc write pointer
   1965 *
   1966 * @ring: amdgpu_ring pointer
   1967 *
   1968 * Commits the enc write pointer to the hardware
   1969 */
   1970static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
   1971{
   1972	struct amdgpu_device *adev = ring->adev;
   1973
   1974	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
   1975		if (ring->use_doorbell) {
   1976			*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
   1977			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
   1978		} else {
   1979			WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
   1980		}
   1981	} else {
   1982		if (ring->use_doorbell) {
   1983			*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
   1984			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
   1985		} else {
   1986			WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
   1987		}
   1988	}
   1989}
   1990
   1991static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = {
   1992	.type = AMDGPU_RING_TYPE_VCN_ENC,
   1993	.align_mask = 0x3f,
   1994	.nop = VCN_ENC_CMD_NO_OP,
   1995	.vmhub = AMDGPU_MMHUB_0,
   1996	.get_rptr = vcn_v3_0_enc_ring_get_rptr,
   1997	.get_wptr = vcn_v3_0_enc_ring_get_wptr,
   1998	.set_wptr = vcn_v3_0_enc_ring_set_wptr,
   1999	.emit_frame_size =
   2000		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
   2001		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
   2002		4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
   2003		5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
   2004		1, /* vcn_v2_0_enc_ring_insert_end */
   2005	.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
   2006	.emit_ib = vcn_v2_0_enc_ring_emit_ib,
   2007	.emit_fence = vcn_v2_0_enc_ring_emit_fence,
   2008	.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
   2009	.test_ring = amdgpu_vcn_enc_ring_test_ring,
   2010	.test_ib = amdgpu_vcn_enc_ring_test_ib,
   2011	.insert_nop = amdgpu_ring_insert_nop,
   2012	.insert_end = vcn_v2_0_enc_ring_insert_end,
   2013	.pad_ib = amdgpu_ring_generic_pad_ib,
   2014	.begin_use = amdgpu_vcn_ring_begin_use,
   2015	.end_use = amdgpu_vcn_ring_end_use,
   2016	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
   2017	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
   2018	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
   2019};
   2020
   2021static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
   2022{
   2023	int i;
   2024
   2025	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
   2026		if (adev->vcn.harvest_config & (1 << i))
   2027			continue;
   2028
   2029		if (!DEC_SW_RING_ENABLED)
   2030			adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs;
   2031		else
   2032			adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_sw_ring_vm_funcs;
   2033		adev->vcn.inst[i].ring_dec.me = i;
   2034		DRM_INFO("VCN(%d) decode%s is enabled in VM mode\n", i,
   2035			  DEC_SW_RING_ENABLED?"(Software Ring)":"");
   2036	}
   2037}
   2038
   2039static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev)
   2040{
   2041	int i, j;
   2042
   2043	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
   2044		if (adev->vcn.harvest_config & (1 << i))
   2045			continue;
   2046
   2047		for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
   2048			adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs;
   2049			adev->vcn.inst[i].ring_enc[j].me = i;
   2050		}
   2051		if (adev->vcn.num_enc_rings > 0)
   2052			DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i);
   2053	}
   2054}
   2055
   2056static bool vcn_v3_0_is_idle(void *handle)
   2057{
   2058	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   2059	int i, ret = 1;
   2060
   2061	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
   2062		if (adev->vcn.harvest_config & (1 << i))
   2063			continue;
   2064
   2065		ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
   2066	}
   2067
   2068	return ret;
   2069}
   2070
   2071static int vcn_v3_0_wait_for_idle(void *handle)
   2072{
   2073	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   2074	int i, ret = 0;
   2075
   2076	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
   2077		if (adev->vcn.harvest_config & (1 << i))
   2078			continue;
   2079
   2080		ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
   2081			UVD_STATUS__IDLE);
   2082		if (ret)
   2083			return ret;
   2084	}
   2085
   2086	return ret;
   2087}
   2088
   2089static int vcn_v3_0_set_clockgating_state(void *handle,
   2090					  enum amd_clockgating_state state)
   2091{
   2092	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   2093	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
   2094	int i;
   2095
   2096	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
   2097		if (adev->vcn.harvest_config & (1 << i))
   2098			continue;
   2099
   2100		if (enable) {
   2101			if (RREG32_SOC15(VCN, i, mmUVD_STATUS) != UVD_STATUS__IDLE)
   2102				return -EBUSY;
   2103			vcn_v3_0_enable_clock_gating(adev, i);
   2104		} else {
   2105			vcn_v3_0_disable_clock_gating(adev, i);
   2106		}
   2107	}
   2108
   2109	return 0;
   2110}
   2111
   2112static int vcn_v3_0_set_powergating_state(void *handle,
   2113					  enum amd_powergating_state state)
   2114{
   2115	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   2116	int ret;
   2117
   2118	/* for SRIOV, guest should not control VCN Power-gating
   2119	 * MMSCH FW should control Power-gating and clock-gating
   2120	 * guest should avoid touching CGC and PG
   2121	 */
   2122	if (amdgpu_sriov_vf(adev)) {
   2123		adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
   2124		return 0;
   2125	}
   2126
   2127	if(state == adev->vcn.cur_state)
   2128		return 0;
   2129
   2130	if (state == AMD_PG_STATE_GATE)
   2131		ret = vcn_v3_0_stop(adev);
   2132	else
   2133		ret = vcn_v3_0_start(adev);
   2134
   2135	if(!ret)
   2136		adev->vcn.cur_state = state;
   2137
   2138	return ret;
   2139}
   2140
   2141static int vcn_v3_0_set_interrupt_state(struct amdgpu_device *adev,
   2142					struct amdgpu_irq_src *source,
   2143					unsigned type,
   2144					enum amdgpu_interrupt_state state)
   2145{
   2146	return 0;
   2147}
   2148
   2149static int vcn_v3_0_process_interrupt(struct amdgpu_device *adev,
   2150				      struct amdgpu_irq_src *source,
   2151				      struct amdgpu_iv_entry *entry)
   2152{
   2153	uint32_t ip_instance;
   2154
   2155	switch (entry->client_id) {
   2156	case SOC15_IH_CLIENTID_VCN:
   2157		ip_instance = 0;
   2158		break;
   2159	case SOC15_IH_CLIENTID_VCN1:
   2160		ip_instance = 1;
   2161		break;
   2162	default:
   2163		DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
   2164		return 0;
   2165	}
   2166
   2167	DRM_DEBUG("IH: VCN TRAP\n");
   2168
   2169	switch (entry->src_id) {
   2170	case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
   2171		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
   2172		break;
   2173	case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
   2174		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
   2175		break;
   2176	case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
   2177		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
   2178		break;
   2179	default:
   2180		DRM_ERROR("Unhandled interrupt: %d %d\n",
   2181			  entry->src_id, entry->src_data[0]);
   2182		break;
   2183	}
   2184
   2185	return 0;
   2186}
   2187
   2188static const struct amdgpu_irq_src_funcs vcn_v3_0_irq_funcs = {
   2189	.set = vcn_v3_0_set_interrupt_state,
   2190	.process = vcn_v3_0_process_interrupt,
   2191};
   2192
   2193static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev)
   2194{
   2195	int i;
   2196
   2197	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
   2198		if (adev->vcn.harvest_config & (1 << i))
   2199			continue;
   2200
   2201		adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
   2202		adev->vcn.inst[i].irq.funcs = &vcn_v3_0_irq_funcs;
   2203	}
   2204}
   2205
   2206static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
   2207	.name = "vcn_v3_0",
   2208	.early_init = vcn_v3_0_early_init,
   2209	.late_init = NULL,
   2210	.sw_init = vcn_v3_0_sw_init,
   2211	.sw_fini = vcn_v3_0_sw_fini,
   2212	.hw_init = vcn_v3_0_hw_init,
   2213	.hw_fini = vcn_v3_0_hw_fini,
   2214	.suspend = vcn_v3_0_suspend,
   2215	.resume = vcn_v3_0_resume,
   2216	.is_idle = vcn_v3_0_is_idle,
   2217	.wait_for_idle = vcn_v3_0_wait_for_idle,
   2218	.check_soft_reset = NULL,
   2219	.pre_soft_reset = NULL,
   2220	.soft_reset = NULL,
   2221	.post_soft_reset = NULL,
   2222	.set_clockgating_state = vcn_v3_0_set_clockgating_state,
   2223	.set_powergating_state = vcn_v3_0_set_powergating_state,
   2224};
   2225
   2226const struct amdgpu_ip_block_version vcn_v3_0_ip_block =
   2227{
   2228	.type = AMD_IP_BLOCK_TYPE_VCN,
   2229	.major = 3,
   2230	.minor = 0,
   2231	.rev = 0,
   2232	.funcs = &vcn_v3_0_ip_funcs,
   2233};