cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vce_v3_0.c (28338B)


      1/*
      2 * Copyright 2014 Advanced Micro Devices, Inc.
      3 * All Rights Reserved.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the
      7 * "Software"), to deal in the Software without restriction, including
      8 * without limitation the rights to use, copy, modify, merge, publish,
      9 * distribute, sub license, and/or sell copies of the Software, and to
     10 * permit persons to whom the Software is furnished to do so, subject to
     11 * the following conditions:
     12 *
     13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
     20 *
     21 * The above copyright notice and this permission notice (including the
     22 * next paragraph) shall be included in all copies or substantial portions
     23 * of the Software.
     24 *
     25 * Authors: Christian König <christian.koenig@amd.com>
     26 */
     27
     28#include <linux/firmware.h>
     29
     30#include "amdgpu.h"
     31#include "amdgpu_vce.h"
     32#include "vid.h"
     33#include "vce/vce_3_0_d.h"
     34#include "vce/vce_3_0_sh_mask.h"
     35#include "oss/oss_3_0_d.h"
     36#include "oss/oss_3_0_sh_mask.h"
     37#include "gca/gfx_8_0_d.h"
     38#include "smu/smu_7_1_2_d.h"
     39#include "smu/smu_7_1_2_sh_mask.h"
     40#include "gca/gfx_8_0_sh_mask.h"
     41#include "ivsrcid/ivsrcid_vislands30.h"
     42
     43
     44#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
     45#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
     46#define GRBM_GFX_INDEX__VCE_ALL_PIPE		0x07
     47
     48#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
     49#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
     50#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
     51#define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
     52
     53#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
     54
     55#define VCE_V3_0_FW_SIZE	(384 * 1024)
     56#define VCE_V3_0_STACK_SIZE	(64 * 1024)
     57#define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
     58
     59#define FW_52_8_3	((52 << 24) | (8 << 16) | (3 << 8))
     60
     61#define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
     62					| GRBM_GFX_INDEX__VCE_ALL_PIPE)
     63
     64static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
     65static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
     66static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
     67static int vce_v3_0_wait_for_idle(void *handle);
     68static int vce_v3_0_set_clockgating_state(void *handle,
     69					  enum amd_clockgating_state state);
     70/**
     71 * vce_v3_0_ring_get_rptr - get read pointer
     72 *
     73 * @ring: amdgpu_ring pointer
     74 *
     75 * Returns the current hardware read pointer
     76 */
     77static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
     78{
     79	struct amdgpu_device *adev = ring->adev;
     80	u32 v;
     81
     82	mutex_lock(&adev->grbm_idx_mutex);
     83	if (adev->vce.harvest_config == 0 ||
     84		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
     85		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
     86	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
     87		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
     88
     89	if (ring->me == 0)
     90		v = RREG32(mmVCE_RB_RPTR);
     91	else if (ring->me == 1)
     92		v = RREG32(mmVCE_RB_RPTR2);
     93	else
     94		v = RREG32(mmVCE_RB_RPTR3);
     95
     96	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
     97	mutex_unlock(&adev->grbm_idx_mutex);
     98
     99	return v;
    100}
    101
    102/**
    103 * vce_v3_0_ring_get_wptr - get write pointer
    104 *
    105 * @ring: amdgpu_ring pointer
    106 *
    107 * Returns the current hardware write pointer
    108 */
    109static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
    110{
    111	struct amdgpu_device *adev = ring->adev;
    112	u32 v;
    113
    114	mutex_lock(&adev->grbm_idx_mutex);
    115	if (adev->vce.harvest_config == 0 ||
    116		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
    117		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
    118	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
    119		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
    120
    121	if (ring->me == 0)
    122		v = RREG32(mmVCE_RB_WPTR);
    123	else if (ring->me == 1)
    124		v = RREG32(mmVCE_RB_WPTR2);
    125	else
    126		v = RREG32(mmVCE_RB_WPTR3);
    127
    128	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
    129	mutex_unlock(&adev->grbm_idx_mutex);
    130
    131	return v;
    132}
    133
    134/**
    135 * vce_v3_0_ring_set_wptr - set write pointer
    136 *
    137 * @ring: amdgpu_ring pointer
    138 *
    139 * Commits the write pointer to the hardware
    140 */
    141static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
    142{
    143	struct amdgpu_device *adev = ring->adev;
    144
    145	mutex_lock(&adev->grbm_idx_mutex);
    146	if (adev->vce.harvest_config == 0 ||
    147		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
    148		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
    149	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
    150		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
    151
    152	if (ring->me == 0)
    153		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
    154	else if (ring->me == 1)
    155		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
    156	else
    157		WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
    158
    159	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
    160	mutex_unlock(&adev->grbm_idx_mutex);
    161}
    162
    163static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
    164{
    165	WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
    166}
    167
    168static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
    169					     bool gated)
    170{
    171	u32 data;
    172
    173	/* Set Override to disable Clock Gating */
    174	vce_v3_0_override_vce_clock_gating(adev, true);
    175
    176	/* This function enables MGCG which is controlled by firmware.
    177	   With the clocks in the gated state the core is still
    178	   accessible but the firmware will throttle the clocks on the
    179	   fly as necessary.
    180	*/
    181	if (!gated) {
    182		data = RREG32(mmVCE_CLOCK_GATING_B);
    183		data |= 0x1ff;
    184		data &= ~0xef0000;
    185		WREG32(mmVCE_CLOCK_GATING_B, data);
    186
    187		data = RREG32(mmVCE_UENC_CLOCK_GATING);
    188		data |= 0x3ff000;
    189		data &= ~0xffc00000;
    190		WREG32(mmVCE_UENC_CLOCK_GATING, data);
    191
    192		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
    193		data |= 0x2;
    194		data &= ~0x00010000;
    195		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
    196
    197		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
    198		data |= 0x37f;
    199		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
    200
    201		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
    202		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
    203			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
    204			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
    205			0x8;
    206		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
    207	} else {
    208		data = RREG32(mmVCE_CLOCK_GATING_B);
    209		data &= ~0x80010;
    210		data |= 0xe70008;
    211		WREG32(mmVCE_CLOCK_GATING_B, data);
    212
    213		data = RREG32(mmVCE_UENC_CLOCK_GATING);
    214		data |= 0xffc00000;
    215		WREG32(mmVCE_UENC_CLOCK_GATING, data);
    216
    217		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
    218		data |= 0x10000;
    219		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
    220
    221		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
    222		data &= ~0x3ff;
    223		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
    224
    225		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
    226		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
    227			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
    228			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
    229			  0x8);
    230		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
    231	}
    232	vce_v3_0_override_vce_clock_gating(adev, false);
    233}
    234
    235static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
    236{
    237	int i, j;
    238
    239	for (i = 0; i < 10; ++i) {
    240		for (j = 0; j < 100; ++j) {
    241			uint32_t status = RREG32(mmVCE_STATUS);
    242
    243			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
    244				return 0;
    245			mdelay(10);
    246		}
    247
    248		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
    249		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
    250		mdelay(10);
    251		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
    252		mdelay(10);
    253	}
    254
    255	return -ETIMEDOUT;
    256}
    257
    258/**
    259 * vce_v3_0_start - start VCE block
    260 *
    261 * @adev: amdgpu_device pointer
    262 *
    263 * Setup and start the VCE block
    264 */
    265static int vce_v3_0_start(struct amdgpu_device *adev)
    266{
    267	struct amdgpu_ring *ring;
    268	int idx, r;
    269
    270	mutex_lock(&adev->grbm_idx_mutex);
    271	for (idx = 0; idx < 2; ++idx) {
    272		if (adev->vce.harvest_config & (1 << idx))
    273			continue;
    274
    275		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
    276
    277		/* Program instance 0 reg space for two instances or instance 0 case
    278		program instance 1 reg space for only instance 1 available case */
    279		if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
    280			ring = &adev->vce.ring[0];
    281			WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
    282			WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
    283			WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
    284			WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
    285			WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
    286
    287			ring = &adev->vce.ring[1];
    288			WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
    289			WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
    290			WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
    291			WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
    292			WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
    293
    294			ring = &adev->vce.ring[2];
    295			WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
    296			WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
    297			WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
    298			WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
    299			WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
    300		}
    301
    302		vce_v3_0_mc_resume(adev, idx);
    303		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
    304
    305		if (adev->asic_type >= CHIP_STONEY)
    306			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
    307		else
    308			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
    309
    310		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
    311		mdelay(100);
    312
    313		r = vce_v3_0_firmware_loaded(adev);
    314
    315		/* clear BUSY flag */
    316		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
    317
    318		if (r) {
    319			DRM_ERROR("VCE not responding, giving up!!!\n");
    320			mutex_unlock(&adev->grbm_idx_mutex);
    321			return r;
    322		}
    323	}
    324
    325	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
    326	mutex_unlock(&adev->grbm_idx_mutex);
    327
    328	return 0;
    329}
    330
    331static int vce_v3_0_stop(struct amdgpu_device *adev)
    332{
    333	int idx;
    334
    335	mutex_lock(&adev->grbm_idx_mutex);
    336	for (idx = 0; idx < 2; ++idx) {
    337		if (adev->vce.harvest_config & (1 << idx))
    338			continue;
    339
    340		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
    341
    342		if (adev->asic_type >= CHIP_STONEY)
    343			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
    344		else
    345			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
    346
    347		/* hold on ECPU */
    348		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
    349
    350		/* clear VCE STATUS */
    351		WREG32(mmVCE_STATUS, 0);
    352	}
    353
    354	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
    355	mutex_unlock(&adev->grbm_idx_mutex);
    356
    357	return 0;
    358}
    359
    360#define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
    361#define VCE_HARVEST_FUSE_MACRO__SHIFT       27
    362#define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
    363
    364static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
    365{
    366	u32 tmp;
    367
    368	if ((adev->asic_type == CHIP_FIJI) ||
    369	    (adev->asic_type == CHIP_STONEY))
    370		return AMDGPU_VCE_HARVEST_VCE1;
    371
    372	if (adev->flags & AMD_IS_APU)
    373		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
    374		       VCE_HARVEST_FUSE_MACRO__MASK) >>
    375			VCE_HARVEST_FUSE_MACRO__SHIFT;
    376	else
    377		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
    378		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
    379			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
    380
    381	switch (tmp) {
    382	case 1:
    383		return AMDGPU_VCE_HARVEST_VCE0;
    384	case 2:
    385		return AMDGPU_VCE_HARVEST_VCE1;
    386	case 3:
    387		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
    388	default:
    389		if ((adev->asic_type == CHIP_POLARIS10) ||
    390		    (adev->asic_type == CHIP_POLARIS11) ||
    391		    (adev->asic_type == CHIP_POLARIS12) ||
    392		    (adev->asic_type == CHIP_VEGAM))
    393			return AMDGPU_VCE_HARVEST_VCE1;
    394
    395		return 0;
    396	}
    397}
    398
    399static int vce_v3_0_early_init(void *handle)
    400{
    401	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    402
    403	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
    404
    405	if ((adev->vce.harvest_config &
    406	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
    407	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
    408		return -ENOENT;
    409
    410	adev->vce.num_rings = 3;
    411
    412	vce_v3_0_set_ring_funcs(adev);
    413	vce_v3_0_set_irq_funcs(adev);
    414
    415	return 0;
    416}
    417
    418static int vce_v3_0_sw_init(void *handle)
    419{
    420	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    421	struct amdgpu_ring *ring;
    422	int r, i;
    423
    424	/* VCE */
    425	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);
    426	if (r)
    427		return r;
    428
    429	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
    430		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
    431	if (r)
    432		return r;
    433
    434	/* 52.8.3 required for 3 ring support */
    435	if (adev->vce.fw_version < FW_52_8_3)
    436		adev->vce.num_rings = 2;
    437
    438	r = amdgpu_vce_resume(adev);
    439	if (r)
    440		return r;
    441
    442	for (i = 0; i < adev->vce.num_rings; i++) {
    443		enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
    444
    445		ring = &adev->vce.ring[i];
    446		sprintf(ring->name, "vce%d", i);
    447		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
    448				     hw_prio, NULL);
    449		if (r)
    450			return r;
    451	}
    452
    453	r = amdgpu_vce_entity_init(adev);
    454
    455	return r;
    456}
    457
    458static int vce_v3_0_sw_fini(void *handle)
    459{
    460	int r;
    461	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    462
    463	r = amdgpu_vce_suspend(adev);
    464	if (r)
    465		return r;
    466
    467	return amdgpu_vce_sw_fini(adev);
    468}
    469
    470static int vce_v3_0_hw_init(void *handle)
    471{
    472	int r, i;
    473	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    474
    475	vce_v3_0_override_vce_clock_gating(adev, true);
    476
    477	amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
    478
    479	for (i = 0; i < adev->vce.num_rings; i++) {
    480		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
    481		if (r)
    482			return r;
    483	}
    484
    485	DRM_INFO("VCE initialized successfully.\n");
    486
    487	return 0;
    488}
    489
    490static int vce_v3_0_hw_fini(void *handle)
    491{
    492	int r;
    493	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    494
    495	cancel_delayed_work_sync(&adev->vce.idle_work);
    496
    497	r = vce_v3_0_wait_for_idle(handle);
    498	if (r)
    499		return r;
    500
    501	vce_v3_0_stop(adev);
    502	return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
    503}
    504
    505static int vce_v3_0_suspend(void *handle)
    506{
    507	int r;
    508	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    509
    510	/*
    511	 * Proper cleanups before halting the HW engine:
    512	 *   - cancel the delayed idle work
    513	 *   - enable powergating
    514	 *   - enable clockgating
    515	 *   - disable dpm
    516	 *
    517	 * TODO: to align with the VCN implementation, move the
    518	 * jobs for clockgating/powergating/dpm setting to
    519	 * ->set_powergating_state().
    520	 */
    521	cancel_delayed_work_sync(&adev->vce.idle_work);
    522
    523	if (adev->pm.dpm_enabled) {
    524		amdgpu_dpm_enable_vce(adev, false);
    525	} else {
    526		amdgpu_asic_set_vce_clocks(adev, 0, 0);
    527		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    528						       AMD_PG_STATE_GATE);
    529		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
    530						       AMD_CG_STATE_GATE);
    531	}
    532
    533	r = vce_v3_0_hw_fini(adev);
    534	if (r)
    535		return r;
    536
    537	return amdgpu_vce_suspend(adev);
    538}
    539
    540static int vce_v3_0_resume(void *handle)
    541{
    542	int r;
    543	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    544
    545	r = amdgpu_vce_resume(adev);
    546	if (r)
    547		return r;
    548
    549	return vce_v3_0_hw_init(adev);
    550}
    551
    552static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
    553{
    554	uint32_t offset, size;
    555
    556	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
    557	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
    558	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
    559	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
    560
    561	WREG32(mmVCE_LMI_CTRL, 0x00398000);
    562	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
    563	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
    564	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
    565	WREG32(mmVCE_LMI_VM_CTRL, 0);
    566	WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
    567
    568	if (adev->asic_type >= CHIP_STONEY) {
    569		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
    570		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
    571		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
    572	} else
    573		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
    574	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
    575	size = VCE_V3_0_FW_SIZE;
    576	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
    577	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
    578
    579	if (idx == 0) {
    580		offset += size;
    581		size = VCE_V3_0_STACK_SIZE;
    582		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
    583		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
    584		offset += size;
    585		size = VCE_V3_0_DATA_SIZE;
    586		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
    587		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
    588	} else {
    589		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
    590		size = VCE_V3_0_STACK_SIZE;
    591		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
    592		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
    593		offset += size;
    594		size = VCE_V3_0_DATA_SIZE;
    595		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
    596		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
    597	}
    598
    599	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
    600	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
    601}
    602
    603static bool vce_v3_0_is_idle(void *handle)
    604{
    605	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    606	u32 mask = 0;
    607
    608	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
    609	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
    610
    611	return !(RREG32(mmSRBM_STATUS2) & mask);
    612}
    613
    614static int vce_v3_0_wait_for_idle(void *handle)
    615{
    616	unsigned i;
    617	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    618
    619	for (i = 0; i < adev->usec_timeout; i++)
    620		if (vce_v3_0_is_idle(handle))
    621			return 0;
    622
    623	return -ETIMEDOUT;
    624}
    625
    626#define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
    627#define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
    628#define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
    629#define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
    630				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
    631
    632static bool vce_v3_0_check_soft_reset(void *handle)
    633{
    634	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    635	u32 srbm_soft_reset = 0;
    636
    637	/* According to VCE team , we should use VCE_STATUS instead
    638	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
    639	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
    640	 * instance's registers are accessed
    641	 * (0 for 1st instance, 10 for 2nd instance).
    642	 *
    643	 *VCE_STATUS
    644	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
    645	 *|----+----+-----------+----+----+----+----------+---------+----|
    646	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
    647	 *
    648	 * VCE team suggest use bit 3--bit 6 for busy status check
    649	 */
    650	mutex_lock(&adev->grbm_idx_mutex);
    651	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
    652	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
    653		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
    654		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
    655	}
    656	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
    657	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
    658		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
    659		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
    660	}
    661	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
    662	mutex_unlock(&adev->grbm_idx_mutex);
    663
    664	if (srbm_soft_reset) {
    665		adev->vce.srbm_soft_reset = srbm_soft_reset;
    666		return true;
    667	} else {
    668		adev->vce.srbm_soft_reset = 0;
    669		return false;
    670	}
    671}
    672
    673static int vce_v3_0_soft_reset(void *handle)
    674{
    675	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    676	u32 srbm_soft_reset;
    677
    678	if (!adev->vce.srbm_soft_reset)
    679		return 0;
    680	srbm_soft_reset = adev->vce.srbm_soft_reset;
    681
    682	if (srbm_soft_reset) {
    683		u32 tmp;
    684
    685		tmp = RREG32(mmSRBM_SOFT_RESET);
    686		tmp |= srbm_soft_reset;
    687		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
    688		WREG32(mmSRBM_SOFT_RESET, tmp);
    689		tmp = RREG32(mmSRBM_SOFT_RESET);
    690
    691		udelay(50);
    692
    693		tmp &= ~srbm_soft_reset;
    694		WREG32(mmSRBM_SOFT_RESET, tmp);
    695		tmp = RREG32(mmSRBM_SOFT_RESET);
    696
    697		/* Wait a little for things to settle down */
    698		udelay(50);
    699	}
    700
    701	return 0;
    702}
    703
    704static int vce_v3_0_pre_soft_reset(void *handle)
    705{
    706	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    707
    708	if (!adev->vce.srbm_soft_reset)
    709		return 0;
    710
    711	mdelay(5);
    712
    713	return vce_v3_0_suspend(adev);
    714}
    715
    716
    717static int vce_v3_0_post_soft_reset(void *handle)
    718{
    719	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    720
    721	if (!adev->vce.srbm_soft_reset)
    722		return 0;
    723
    724	mdelay(5);
    725
    726	return vce_v3_0_resume(adev);
    727}
    728
    729static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
    730					struct amdgpu_irq_src *source,
    731					unsigned type,
    732					enum amdgpu_interrupt_state state)
    733{
    734	uint32_t val = 0;
    735
    736	if (state == AMDGPU_IRQ_STATE_ENABLE)
    737		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
    738
    739	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
    740	return 0;
    741}
    742
    743static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
    744				      struct amdgpu_irq_src *source,
    745				      struct amdgpu_iv_entry *entry)
    746{
    747	DRM_DEBUG("IH: VCE\n");
    748
    749	WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
    750
    751	switch (entry->src_data[0]) {
    752	case 0:
    753	case 1:
    754	case 2:
    755		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
    756		break;
    757	default:
    758		DRM_ERROR("Unhandled interrupt: %d %d\n",
    759			  entry->src_id, entry->src_data[0]);
    760		break;
    761	}
    762
    763	return 0;
    764}
    765
    766static int vce_v3_0_set_clockgating_state(void *handle,
    767					  enum amd_clockgating_state state)
    768{
    769	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    770	bool enable = (state == AMD_CG_STATE_GATE);
    771	int i;
    772
    773	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
    774		return 0;
    775
    776	mutex_lock(&adev->grbm_idx_mutex);
    777	for (i = 0; i < 2; i++) {
    778		/* Program VCE Instance 0 or 1 if not harvested */
    779		if (adev->vce.harvest_config & (1 << i))
    780			continue;
    781
    782		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
    783
    784		if (!enable) {
    785			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
    786			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
    787			data &= ~(0xf | 0xff0);
    788			data |= ((0x0 << 0) | (0x04 << 4));
    789			WREG32(mmVCE_CLOCK_GATING_A, data);
    790
    791			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
    792			data = RREG32(mmVCE_UENC_CLOCK_GATING);
    793			data &= ~(0xf | 0xff0);
    794			data |= ((0x0 << 0) | (0x04 << 4));
    795			WREG32(mmVCE_UENC_CLOCK_GATING, data);
    796		}
    797
    798		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
    799	}
    800
    801	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
    802	mutex_unlock(&adev->grbm_idx_mutex);
    803
    804	return 0;
    805}
    806
    807static int vce_v3_0_set_powergating_state(void *handle,
    808					  enum amd_powergating_state state)
    809{
    810	/* This doesn't actually powergate the VCE block.
    811	 * That's done in the dpm code via the SMC.  This
    812	 * just re-inits the block as necessary.  The actual
    813	 * gating still happens in the dpm code.  We should
    814	 * revisit this when there is a cleaner line between
    815	 * the smc and the hw blocks
    816	 */
    817	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    818	int ret = 0;
    819
    820	if (state == AMD_PG_STATE_GATE) {
    821		ret = vce_v3_0_stop(adev);
    822		if (ret)
    823			goto out;
    824	} else {
    825		ret = vce_v3_0_start(adev);
    826		if (ret)
    827			goto out;
    828	}
    829
    830out:
    831	return ret;
    832}
    833
    834static void vce_v3_0_get_clockgating_state(void *handle, u64 *flags)
    835{
    836	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    837	int data;
    838
    839	mutex_lock(&adev->pm.mutex);
    840
    841	if (adev->flags & AMD_IS_APU)
    842		data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
    843	else
    844		data = RREG32_SMC(ixCURRENT_PG_STATUS);
    845
    846	if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
    847		DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
    848		goto out;
    849	}
    850
    851	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
    852
    853	/* AMD_CG_SUPPORT_VCE_MGCG */
    854	data = RREG32(mmVCE_CLOCK_GATING_A);
    855	if (data & (0x04 << 4))
    856		*flags |= AMD_CG_SUPPORT_VCE_MGCG;
    857
    858out:
    859	mutex_unlock(&adev->pm.mutex);
    860}
    861
    862static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
    863				  struct amdgpu_job *job,
    864				  struct amdgpu_ib *ib,
    865				  uint32_t flags)
    866{
    867	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
    868
    869	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
    870	amdgpu_ring_write(ring, vmid);
    871	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
    872	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
    873	amdgpu_ring_write(ring, ib->length_dw);
    874}
    875
    876static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
    877				   unsigned int vmid, uint64_t pd_addr)
    878{
    879	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
    880	amdgpu_ring_write(ring, vmid);
    881	amdgpu_ring_write(ring, pd_addr >> 12);
    882
    883	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
    884	amdgpu_ring_write(ring, vmid);
    885	amdgpu_ring_write(ring, VCE_CMD_END);
    886}
    887
    888static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
    889{
    890	uint32_t seq = ring->fence_drv.sync_seq;
    891	uint64_t addr = ring->fence_drv.gpu_addr;
    892
    893	amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
    894	amdgpu_ring_write(ring, lower_32_bits(addr));
    895	amdgpu_ring_write(ring, upper_32_bits(addr));
    896	amdgpu_ring_write(ring, seq);
    897}
    898
    899static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
    900	.name = "vce_v3_0",
    901	.early_init = vce_v3_0_early_init,
    902	.late_init = NULL,
    903	.sw_init = vce_v3_0_sw_init,
    904	.sw_fini = vce_v3_0_sw_fini,
    905	.hw_init = vce_v3_0_hw_init,
    906	.hw_fini = vce_v3_0_hw_fini,
    907	.suspend = vce_v3_0_suspend,
    908	.resume = vce_v3_0_resume,
    909	.is_idle = vce_v3_0_is_idle,
    910	.wait_for_idle = vce_v3_0_wait_for_idle,
    911	.check_soft_reset = vce_v3_0_check_soft_reset,
    912	.pre_soft_reset = vce_v3_0_pre_soft_reset,
    913	.soft_reset = vce_v3_0_soft_reset,
    914	.post_soft_reset = vce_v3_0_post_soft_reset,
    915	.set_clockgating_state = vce_v3_0_set_clockgating_state,
    916	.set_powergating_state = vce_v3_0_set_powergating_state,
    917	.get_clockgating_state = vce_v3_0_get_clockgating_state,
    918};
    919
    920static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
    921	.type = AMDGPU_RING_TYPE_VCE,
    922	.align_mask = 0xf,
    923	.nop = VCE_CMD_NO_OP,
    924	.support_64bit_ptrs = false,
    925	.no_user_fence = true,
    926	.get_rptr = vce_v3_0_ring_get_rptr,
    927	.get_wptr = vce_v3_0_ring_get_wptr,
    928	.set_wptr = vce_v3_0_ring_set_wptr,
    929	.parse_cs = amdgpu_vce_ring_parse_cs,
    930	.emit_frame_size =
    931		4 + /* vce_v3_0_emit_pipeline_sync */
    932		6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
    933	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
    934	.emit_ib = amdgpu_vce_ring_emit_ib,
    935	.emit_fence = amdgpu_vce_ring_emit_fence,
    936	.test_ring = amdgpu_vce_ring_test_ring,
    937	.test_ib = amdgpu_vce_ring_test_ib,
    938	.insert_nop = amdgpu_ring_insert_nop,
    939	.pad_ib = amdgpu_ring_generic_pad_ib,
    940	.begin_use = amdgpu_vce_ring_begin_use,
    941	.end_use = amdgpu_vce_ring_end_use,
    942};
    943
    944static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
    945	.type = AMDGPU_RING_TYPE_VCE,
    946	.align_mask = 0xf,
    947	.nop = VCE_CMD_NO_OP,
    948	.support_64bit_ptrs = false,
    949	.no_user_fence = true,
    950	.get_rptr = vce_v3_0_ring_get_rptr,
    951	.get_wptr = vce_v3_0_ring_get_wptr,
    952	.set_wptr = vce_v3_0_ring_set_wptr,
    953	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
    954	.emit_frame_size =
    955		6 + /* vce_v3_0_emit_vm_flush */
    956		4 + /* vce_v3_0_emit_pipeline_sync */
    957		6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
    958	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
    959	.emit_ib = vce_v3_0_ring_emit_ib,
    960	.emit_vm_flush = vce_v3_0_emit_vm_flush,
    961	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
    962	.emit_fence = amdgpu_vce_ring_emit_fence,
    963	.test_ring = amdgpu_vce_ring_test_ring,
    964	.test_ib = amdgpu_vce_ring_test_ib,
    965	.insert_nop = amdgpu_ring_insert_nop,
    966	.pad_ib = amdgpu_ring_generic_pad_ib,
    967	.begin_use = amdgpu_vce_ring_begin_use,
    968	.end_use = amdgpu_vce_ring_end_use,
    969};
    970
    971static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
    972{
    973	int i;
    974
    975	if (adev->asic_type >= CHIP_STONEY) {
    976		for (i = 0; i < adev->vce.num_rings; i++) {
    977			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
    978			adev->vce.ring[i].me = i;
    979		}
    980		DRM_INFO("VCE enabled in VM mode\n");
    981	} else {
    982		for (i = 0; i < adev->vce.num_rings; i++) {
    983			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
    984			adev->vce.ring[i].me = i;
    985		}
    986		DRM_INFO("VCE enabled in physical mode\n");
    987	}
    988}
    989
    990static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
    991	.set = vce_v3_0_set_interrupt_state,
    992	.process = vce_v3_0_process_interrupt,
    993};
    994
    995static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
    996{
    997	adev->vce.irq.num_types = 1;
    998	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
    999};
   1000
   1001const struct amdgpu_ip_block_version vce_v3_0_ip_block =
   1002{
   1003	.type = AMD_IP_BLOCK_TYPE_VCE,
   1004	.major = 3,
   1005	.minor = 0,
   1006	.rev = 0,
   1007	.funcs = &vce_v3_0_ip_funcs,
   1008};
   1009
   1010const struct amdgpu_ip_block_version vce_v3_1_ip_block =
   1011{
   1012	.type = AMD_IP_BLOCK_TYPE_VCE,
   1013	.major = 3,
   1014	.minor = 1,
   1015	.rev = 0,
   1016	.funcs = &vce_v3_0_ip_funcs,
   1017};
   1018
   1019const struct amdgpu_ip_block_version vce_v3_4_ip_block =
   1020{
   1021	.type = AMD_IP_BLOCK_TYPE_VCE,
   1022	.major = 3,
   1023	.minor = 4,
   1024	.rev = 0,
   1025	.funcs = &vce_v3_0_ip_funcs,
   1026};