cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sdma_v2_4.c (36483B)


      1/*
      2 * Copyright 2014 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 * Authors: Alex Deucher
     23 */
     24
     25#include <linux/delay.h>
     26#include <linux/firmware.h>
     27#include <linux/module.h>
     28
     29#include "amdgpu.h"
     30#include "amdgpu_ucode.h"
     31#include "amdgpu_trace.h"
     32#include "vi.h"
     33#include "vid.h"
     34
     35#include "oss/oss_2_4_d.h"
     36#include "oss/oss_2_4_sh_mask.h"
     37
     38#include "gmc/gmc_7_1_d.h"
     39#include "gmc/gmc_7_1_sh_mask.h"
     40
     41#include "gca/gfx_8_0_d.h"
     42#include "gca/gfx_8_0_enum.h"
     43#include "gca/gfx_8_0_sh_mask.h"
     44
     45#include "bif/bif_5_0_d.h"
     46#include "bif/bif_5_0_sh_mask.h"
     47
     48#include "iceland_sdma_pkt_open.h"
     49
     50#include "ivsrcid/ivsrcid_vislands30.h"
     51
     52static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev);
     53static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev);
     54static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev);
     55static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev);
     56
     57MODULE_FIRMWARE("amdgpu/topaz_sdma.bin");
     58MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin");
     59
     60static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
     61{
     62	SDMA0_REGISTER_OFFSET,
     63	SDMA1_REGISTER_OFFSET
     64};
     65
     66static const u32 golden_settings_iceland_a11[] =
     67{
     68	mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
     69	mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
     70	mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
     71	mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
     72};
     73
     74static const u32 iceland_mgcg_cgcg_init[] =
     75{
     76	mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
     77	mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
     78};
     79
     80/*
     81 * sDMA - System DMA
     82 * Starting with CIK, the GPU has new asynchronous
     83 * DMA engines.  These engines are used for compute
     84 * and gfx.  There are two DMA engines (SDMA0, SDMA1)
     85 * and each one supports 1 ring buffer used for gfx
     86 * and 2 queues used for compute.
     87 *
     88 * The programming model is very similar to the CP
     89 * (ring buffer, IBs, etc.), but sDMA has it's own
     90 * packet format that is different from the PM4 format
     91 * used by the CP. sDMA supports copying data, writing
     92 * embedded data, solid fills, and a number of other
     93 * things.  It also has support for tiling/detiling of
     94 * buffers.
     95 */
     96
     97static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev)
     98{
     99	switch (adev->asic_type) {
    100	case CHIP_TOPAZ:
    101		amdgpu_device_program_register_sequence(adev,
    102							iceland_mgcg_cgcg_init,
    103							ARRAY_SIZE(iceland_mgcg_cgcg_init));
    104		amdgpu_device_program_register_sequence(adev,
    105							golden_settings_iceland_a11,
    106							ARRAY_SIZE(golden_settings_iceland_a11));
    107		break;
    108	default:
    109		break;
    110	}
    111}
    112
    113static void sdma_v2_4_free_microcode(struct amdgpu_device *adev)
    114{
    115	int i;
    116	for (i = 0; i < adev->sdma.num_instances; i++) {
    117		release_firmware(adev->sdma.instance[i].fw);
    118		adev->sdma.instance[i].fw = NULL;
    119	}
    120}
    121
    122/**
    123 * sdma_v2_4_init_microcode - load ucode images from disk
    124 *
    125 * @adev: amdgpu_device pointer
    126 *
    127 * Use the firmware interface to load the ucode images into
    128 * the driver (not loaded into hw).
    129 * Returns 0 on success, error on failure.
    130 */
    131static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
    132{
    133	const char *chip_name;
    134	char fw_name[30];
    135	int err = 0, i;
    136	struct amdgpu_firmware_info *info = NULL;
    137	const struct common_firmware_header *header = NULL;
    138	const struct sdma_firmware_header_v1_0 *hdr;
    139
    140	DRM_DEBUG("\n");
    141
    142	switch (adev->asic_type) {
    143	case CHIP_TOPAZ:
    144		chip_name = "topaz";
    145		break;
    146	default: BUG();
    147	}
    148
    149	for (i = 0; i < adev->sdma.num_instances; i++) {
    150		if (i == 0)
    151			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
    152		else
    153			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
    154		err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
    155		if (err)
    156			goto out;
    157		err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
    158		if (err)
    159			goto out;
    160		hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
    161		adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
    162		adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
    163		if (adev->sdma.instance[i].feature_version >= 20)
    164			adev->sdma.instance[i].burst_nop = true;
    165
    166		if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
    167			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
    168			info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
    169			info->fw = adev->sdma.instance[i].fw;
    170			header = (const struct common_firmware_header *)info->fw->data;
    171			adev->firmware.fw_size +=
    172				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
    173		}
    174	}
    175
    176out:
    177	if (err) {
    178		pr_err("sdma_v2_4: Failed to load firmware \"%s\"\n", fw_name);
    179		for (i = 0; i < adev->sdma.num_instances; i++) {
    180			release_firmware(adev->sdma.instance[i].fw);
    181			adev->sdma.instance[i].fw = NULL;
    182		}
    183	}
    184	return err;
    185}
    186
    187/**
    188 * sdma_v2_4_ring_get_rptr - get the current read pointer
    189 *
    190 * @ring: amdgpu ring pointer
    191 *
    192 * Get the current rptr from the hardware (VI+).
    193 */
    194static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
    195{
    196	/* XXX check if swapping is necessary on BE */
    197	return *ring->rptr_cpu_addr >> 2;
    198}
    199
    200/**
    201 * sdma_v2_4_ring_get_wptr - get the current write pointer
    202 *
    203 * @ring: amdgpu ring pointer
    204 *
    205 * Get the current wptr from the hardware (VI+).
    206 */
    207static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)
    208{
    209	struct amdgpu_device *adev = ring->adev;
    210	u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;
    211
    212	return wptr;
    213}
    214
    215/**
    216 * sdma_v2_4_ring_set_wptr - commit the write pointer
    217 *
    218 * @ring: amdgpu ring pointer
    219 *
    220 * Write the wptr back to the hardware (VI+).
    221 */
    222static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
    223{
    224	struct amdgpu_device *adev = ring->adev;
    225
    226	WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2);
    227}
    228
    229static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
    230{
    231	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
    232	int i;
    233
    234	for (i = 0; i < count; i++)
    235		if (sdma && sdma->burst_nop && (i == 0))
    236			amdgpu_ring_write(ring, ring->funcs->nop |
    237				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
    238		else
    239			amdgpu_ring_write(ring, ring->funcs->nop);
    240}
    241
    242/**
    243 * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
    244 *
    245 * @ring: amdgpu ring pointer
    246 * @job: job to retrieve vmid from
    247 * @ib: IB object to schedule
    248 * @flags: unused
    249 *
    250 * Schedule an IB in the DMA ring (VI).
    251 */
    252static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
    253				   struct amdgpu_job *job,
    254				   struct amdgpu_ib *ib,
    255				   uint32_t flags)
    256{
    257	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
    258
    259	/* IB packet must end on a 8 DW boundary */
    260	sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
    261
    262	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
    263			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
    264	/* base must be 32 byte aligned */
    265	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
    266	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
    267	amdgpu_ring_write(ring, ib->length_dw);
    268	amdgpu_ring_write(ring, 0);
    269	amdgpu_ring_write(ring, 0);
    270
    271}
    272
    273/**
    274 * sdma_v2_4_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
    275 *
    276 * @ring: amdgpu ring pointer
    277 *
    278 * Emit an hdp flush packet on the requested DMA ring.
    279 */
    280static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
    281{
    282	u32 ref_and_mask = 0;
    283
    284	if (ring->me == 0)
    285		ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
    286	else
    287		ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
    288
    289	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
    290			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
    291			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
    292	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2);
    293	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2);
    294	amdgpu_ring_write(ring, ref_and_mask); /* reference */
    295	amdgpu_ring_write(ring, ref_and_mask); /* mask */
    296	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
    297			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
    298}
    299
    300/**
    301 * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring
    302 *
    303 * @ring: amdgpu ring pointer
    304 * @addr: address
    305 * @seq: sequence number
    306 * @flags: fence related flags
    307 *
    308 * Add a DMA fence packet to the ring to write
    309 * the fence seq number and DMA trap packet to generate
    310 * an interrupt if needed (VI).
    311 */
    312static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
    313				      unsigned flags)
    314{
    315	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
    316	/* write the fence */
    317	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
    318	amdgpu_ring_write(ring, lower_32_bits(addr));
    319	amdgpu_ring_write(ring, upper_32_bits(addr));
    320	amdgpu_ring_write(ring, lower_32_bits(seq));
    321
    322	/* optionally write high bits as well */
    323	if (write64bit) {
    324		addr += 4;
    325		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
    326		amdgpu_ring_write(ring, lower_32_bits(addr));
    327		amdgpu_ring_write(ring, upper_32_bits(addr));
    328		amdgpu_ring_write(ring, upper_32_bits(seq));
    329	}
    330
    331	/* generate an interrupt */
    332	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
    333	amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
    334}
    335
    336/**
    337 * sdma_v2_4_gfx_stop - stop the gfx async dma engines
    338 *
    339 * @adev: amdgpu_device pointer
    340 *
    341 * Stop the gfx async dma ring buffers (VI).
    342 */
    343static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
    344{
    345	struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
    346	struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
    347	u32 rb_cntl, ib_cntl;
    348	int i;
    349
    350	if ((adev->mman.buffer_funcs_ring == sdma0) ||
    351	    (adev->mman.buffer_funcs_ring == sdma1))
    352		amdgpu_ttm_set_buffer_funcs_status(adev, false);
    353
    354	for (i = 0; i < adev->sdma.num_instances; i++) {
    355		rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
    356		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
    357		WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
    358		ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
    359		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
    360		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
    361	}
    362}
    363
    364/**
    365 * sdma_v2_4_rlc_stop - stop the compute async dma engines
    366 *
    367 * @adev: amdgpu_device pointer
    368 *
    369 * Stop the compute async dma queues (VI).
    370 */
    371static void sdma_v2_4_rlc_stop(struct amdgpu_device *adev)
    372{
    373	/* XXX todo */
    374}
    375
    376/**
    377 * sdma_v2_4_enable - stop the async dma engines
    378 *
    379 * @adev: amdgpu_device pointer
    380 * @enable: enable/disable the DMA MEs.
    381 *
    382 * Halt or unhalt the async dma engines (VI).
    383 */
    384static void sdma_v2_4_enable(struct amdgpu_device *adev, bool enable)
    385{
    386	u32 f32_cntl;
    387	int i;
    388
    389	if (!enable) {
    390		sdma_v2_4_gfx_stop(adev);
    391		sdma_v2_4_rlc_stop(adev);
    392	}
    393
    394	for (i = 0; i < adev->sdma.num_instances; i++) {
    395		f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]);
    396		if (enable)
    397			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0);
    398		else
    399			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
    400		WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], f32_cntl);
    401	}
    402}
    403
    404/**
    405 * sdma_v2_4_gfx_resume - setup and start the async dma engines
    406 *
    407 * @adev: amdgpu_device pointer
    408 *
    409 * Set up the gfx DMA ring buffers and enable them (VI).
    410 * Returns 0 for success, error for failure.
    411 */
    412static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
    413{
    414	struct amdgpu_ring *ring;
    415	u32 rb_cntl, ib_cntl;
    416	u32 rb_bufsz;
    417	int i, j, r;
    418
    419	for (i = 0; i < adev->sdma.num_instances; i++) {
    420		ring = &adev->sdma.instance[i].ring;
    421
    422		mutex_lock(&adev->srbm_mutex);
    423		for (j = 0; j < 16; j++) {
    424			vi_srbm_select(adev, 0, 0, 0, j);
    425			/* SDMA GFX */
    426			WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0);
    427			WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0);
    428		}
    429		vi_srbm_select(adev, 0, 0, 0, 0);
    430		mutex_unlock(&adev->srbm_mutex);
    431
    432		WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
    433		       adev->gfx.config.gb_addr_config & 0x70);
    434
    435		WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
    436
    437		/* Set ring buffer size in dwords */
    438		rb_bufsz = order_base_2(ring->ring_size / 4);
    439		rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
    440		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
    441#ifdef __BIG_ENDIAN
    442		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
    443		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
    444					RPTR_WRITEBACK_SWAP_ENABLE, 1);
    445#endif
    446		WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
    447
    448		/* Initialize the ring buffer's read and write pointers */
    449		WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
    450		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
    451		WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
    452		WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
    453
    454		/* set the wb address whether it's enabled or not */
    455		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
    456		       upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
    457		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
    458		       lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
    459
    460		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
    461
    462		WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
    463		WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
    464
    465		ring->wptr = 0;
    466		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
    467
    468		/* enable DMA RB */
    469		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
    470		WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
    471
    472		ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
    473		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
    474#ifdef __BIG_ENDIAN
    475		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
    476#endif
    477		/* enable DMA IBs */
    478		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
    479
    480		ring->sched.ready = true;
    481	}
    482
    483	sdma_v2_4_enable(adev, true);
    484	for (i = 0; i < adev->sdma.num_instances; i++) {
    485		ring = &adev->sdma.instance[i].ring;
    486		r = amdgpu_ring_test_helper(ring);
    487		if (r)
    488			return r;
    489
    490		if (adev->mman.buffer_funcs_ring == ring)
    491			amdgpu_ttm_set_buffer_funcs_status(adev, true);
    492	}
    493
    494	return 0;
    495}
    496
    497/**
    498 * sdma_v2_4_rlc_resume - setup and start the async dma engines
    499 *
    500 * @adev: amdgpu_device pointer
    501 *
    502 * Set up the compute DMA queues and enable them (VI).
    503 * Returns 0 for success, error for failure.
    504 */
    505static int sdma_v2_4_rlc_resume(struct amdgpu_device *adev)
    506{
    507	/* XXX todo */
    508	return 0;
    509}
    510
    511
    512/**
    513 * sdma_v2_4_start - setup and start the async dma engines
    514 *
    515 * @adev: amdgpu_device pointer
    516 *
    517 * Set up the DMA engines and enable them (VI).
    518 * Returns 0 for success, error for failure.
    519 */
    520static int sdma_v2_4_start(struct amdgpu_device *adev)
    521{
    522	int r;
    523
    524	/* halt the engine before programing */
    525	sdma_v2_4_enable(adev, false);
    526
    527	/* start the gfx rings and rlc compute queues */
    528	r = sdma_v2_4_gfx_resume(adev);
    529	if (r)
    530		return r;
    531	r = sdma_v2_4_rlc_resume(adev);
    532	if (r)
    533		return r;
    534
    535	return 0;
    536}
    537
    538/**
    539 * sdma_v2_4_ring_test_ring - simple async dma engine test
    540 *
    541 * @ring: amdgpu_ring structure holding ring information
    542 *
    543 * Test the DMA engine by writing using it to write an
    544 * value to memory. (VI).
    545 * Returns 0 for success, error for failure.
    546 */
    547static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
    548{
    549	struct amdgpu_device *adev = ring->adev;
    550	unsigned i;
    551	unsigned index;
    552	int r;
    553	u32 tmp;
    554	u64 gpu_addr;
    555
    556	r = amdgpu_device_wb_get(adev, &index);
    557	if (r)
    558		return r;
    559
    560	gpu_addr = adev->wb.gpu_addr + (index * 4);
    561	tmp = 0xCAFEDEAD;
    562	adev->wb.wb[index] = cpu_to_le32(tmp);
    563
    564	r = amdgpu_ring_alloc(ring, 5);
    565	if (r)
    566		goto error_free_wb;
    567
    568	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
    569			  SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
    570	amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
    571	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
    572	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
    573	amdgpu_ring_write(ring, 0xDEADBEEF);
    574	amdgpu_ring_commit(ring);
    575
    576	for (i = 0; i < adev->usec_timeout; i++) {
    577		tmp = le32_to_cpu(adev->wb.wb[index]);
    578		if (tmp == 0xDEADBEEF)
    579			break;
    580		udelay(1);
    581	}
    582
    583	if (i >= adev->usec_timeout)
    584		r = -ETIMEDOUT;
    585
    586error_free_wb:
    587	amdgpu_device_wb_free(adev, index);
    588	return r;
    589}
    590
    591/**
    592 * sdma_v2_4_ring_test_ib - test an IB on the DMA engine
    593 *
    594 * @ring: amdgpu_ring structure holding ring information
    595 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
    596 *
    597 * Test a simple IB in the DMA ring (VI).
    598 * Returns 0 on success, error on failure.
    599 */
    600static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
    601{
    602	struct amdgpu_device *adev = ring->adev;
    603	struct amdgpu_ib ib;
    604	struct dma_fence *f = NULL;
    605	unsigned index;
    606	u32 tmp = 0;
    607	u64 gpu_addr;
    608	long r;
    609
    610	r = amdgpu_device_wb_get(adev, &index);
    611	if (r)
    612		return r;
    613
    614	gpu_addr = adev->wb.gpu_addr + (index * 4);
    615	tmp = 0xCAFEDEAD;
    616	adev->wb.wb[index] = cpu_to_le32(tmp);
    617	memset(&ib, 0, sizeof(ib));
    618	r = amdgpu_ib_get(adev, NULL, 256,
    619					AMDGPU_IB_POOL_DIRECT, &ib);
    620	if (r)
    621		goto err0;
    622
    623	ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
    624		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
    625	ib.ptr[1] = lower_32_bits(gpu_addr);
    626	ib.ptr[2] = upper_32_bits(gpu_addr);
    627	ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1);
    628	ib.ptr[4] = 0xDEADBEEF;
    629	ib.ptr[5] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
    630	ib.ptr[6] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
    631	ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
    632	ib.length_dw = 8;
    633
    634	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
    635	if (r)
    636		goto err1;
    637
    638	r = dma_fence_wait_timeout(f, false, timeout);
    639	if (r == 0) {
    640		r = -ETIMEDOUT;
    641		goto err1;
    642	} else if (r < 0) {
    643		goto err1;
    644	}
    645	tmp = le32_to_cpu(adev->wb.wb[index]);
    646	if (tmp == 0xDEADBEEF)
    647		r = 0;
    648	else
    649		r = -EINVAL;
    650
    651err1:
    652	amdgpu_ib_free(adev, &ib, NULL);
    653	dma_fence_put(f);
    654err0:
    655	amdgpu_device_wb_free(adev, index);
    656	return r;
    657}
    658
    659/**
    660 * sdma_v2_4_vm_copy_pte - update PTEs by copying them from the GART
    661 *
    662 * @ib: indirect buffer to fill with commands
    663 * @pe: addr of the page entry
    664 * @src: src addr to copy from
    665 * @count: number of page entries to update
    666 *
    667 * Update PTEs by copying them from the GART using sDMA (CIK).
    668 */
    669static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
    670				  uint64_t pe, uint64_t src,
    671				  unsigned count)
    672{
    673	unsigned bytes = count * 8;
    674
    675	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
    676		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
    677	ib->ptr[ib->length_dw++] = bytes;
    678	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
    679	ib->ptr[ib->length_dw++] = lower_32_bits(src);
    680	ib->ptr[ib->length_dw++] = upper_32_bits(src);
    681	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
    682	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
    683}
    684
    685/**
    686 * sdma_v2_4_vm_write_pte - update PTEs by writing them manually
    687 *
    688 * @ib: indirect buffer to fill with commands
    689 * @pe: addr of the page entry
    690 * @value: dst addr to write into pe
    691 * @count: number of page entries to update
    692 * @incr: increase next addr by incr bytes
    693 *
    694 * Update PTEs by writing them manually using sDMA (CIK).
    695 */
    696static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
    697				   uint64_t value, unsigned count,
    698				   uint32_t incr)
    699{
    700	unsigned ndw = count * 2;
    701
    702	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
    703		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
    704	ib->ptr[ib->length_dw++] = pe;
    705	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
    706	ib->ptr[ib->length_dw++] = ndw;
    707	for (; ndw > 0; ndw -= 2) {
    708		ib->ptr[ib->length_dw++] = lower_32_bits(value);
    709		ib->ptr[ib->length_dw++] = upper_32_bits(value);
    710		value += incr;
    711	}
    712}
    713
    714/**
    715 * sdma_v2_4_vm_set_pte_pde - update the page tables using sDMA
    716 *
    717 * @ib: indirect buffer to fill with commands
    718 * @pe: addr of the page entry
    719 * @addr: dst addr to write into pe
    720 * @count: number of page entries to update
    721 * @incr: increase next addr by incr bytes
    722 * @flags: access flags
    723 *
    724 * Update the page tables using sDMA (CIK).
    725 */
    726static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
    727				     uint64_t addr, unsigned count,
    728				     uint32_t incr, uint64_t flags)
    729{
    730	/* for physically contiguous pages (vram) */
    731	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
    732	ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
    733	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
    734	ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
    735	ib->ptr[ib->length_dw++] = upper_32_bits(flags);
    736	ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
    737	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
    738	ib->ptr[ib->length_dw++] = incr; /* increment size */
    739	ib->ptr[ib->length_dw++] = 0;
    740	ib->ptr[ib->length_dw++] = count; /* number of entries */
    741}
    742
    743/**
    744 * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw
    745 *
    746 * @ring: amdgpu_ring structure holding ring information
    747 * @ib: indirect buffer to fill with padding
    748 *
    749 */
    750static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
    751{
    752	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
    753	u32 pad_count;
    754	int i;
    755
    756	pad_count = (-ib->length_dw) & 7;
    757	for (i = 0; i < pad_count; i++)
    758		if (sdma && sdma->burst_nop && (i == 0))
    759			ib->ptr[ib->length_dw++] =
    760				SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
    761				SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
    762		else
    763			ib->ptr[ib->length_dw++] =
    764				SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
    765}
    766
    767/**
    768 * sdma_v2_4_ring_emit_pipeline_sync - sync the pipeline
    769 *
    770 * @ring: amdgpu_ring pointer
    771 *
    772 * Make sure all previous operations are completed (CIK).
    773 */
    774static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
    775{
    776	uint32_t seq = ring->fence_drv.sync_seq;
    777	uint64_t addr = ring->fence_drv.gpu_addr;
    778
    779	/* wait for idle */
    780	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
    781			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
    782			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
    783			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
    784	amdgpu_ring_write(ring, addr & 0xfffffffc);
    785	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
    786	amdgpu_ring_write(ring, seq); /* reference */
    787	amdgpu_ring_write(ring, 0xffffffff); /* mask */
    788	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
    789			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
    790}
    791
    792/**
    793 * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA
    794 *
    795 * @ring: amdgpu_ring pointer
    796 * @vmid: vmid number to use
    797 * @pd_addr: address
    798 *
    799 * Update the page table base and flush the VM TLB
    800 * using sDMA (VI).
    801 */
    802static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
    803					 unsigned vmid, uint64_t pd_addr)
    804{
    805	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
    806
    807	/* wait for flush */
    808	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
    809			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
    810			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(0)); /* always */
    811	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
    812	amdgpu_ring_write(ring, 0);
    813	amdgpu_ring_write(ring, 0); /* reference */
    814	amdgpu_ring_write(ring, 0); /* mask */
    815	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
    816			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
    817}
    818
    819static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring,
    820				     uint32_t reg, uint32_t val)
    821{
    822	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
    823			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
    824	amdgpu_ring_write(ring, reg);
    825	amdgpu_ring_write(ring, val);
    826}
    827
    828static int sdma_v2_4_early_init(void *handle)
    829{
    830	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    831
    832	adev->sdma.num_instances = SDMA_MAX_INSTANCE;
    833
    834	sdma_v2_4_set_ring_funcs(adev);
    835	sdma_v2_4_set_buffer_funcs(adev);
    836	sdma_v2_4_set_vm_pte_funcs(adev);
    837	sdma_v2_4_set_irq_funcs(adev);
    838
    839	return 0;
    840}
    841
    842static int sdma_v2_4_sw_init(void *handle)
    843{
    844	struct amdgpu_ring *ring;
    845	int r, i;
    846	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    847
    848	/* SDMA trap event */
    849	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
    850			      &adev->sdma.trap_irq);
    851	if (r)
    852		return r;
    853
    854	/* SDMA Privileged inst */
    855	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
    856			      &adev->sdma.illegal_inst_irq);
    857	if (r)
    858		return r;
    859
    860	/* SDMA Privileged inst */
    861	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
    862			      &adev->sdma.illegal_inst_irq);
    863	if (r)
    864		return r;
    865
    866	r = sdma_v2_4_init_microcode(adev);
    867	if (r) {
    868		DRM_ERROR("Failed to load sdma firmware!\n");
    869		return r;
    870	}
    871
    872	for (i = 0; i < adev->sdma.num_instances; i++) {
    873		ring = &adev->sdma.instance[i].ring;
    874		ring->ring_obj = NULL;
    875		ring->use_doorbell = false;
    876		sprintf(ring->name, "sdma%d", i);
    877		r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
    878				     (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
    879				     AMDGPU_SDMA_IRQ_INSTANCE1,
    880				     AMDGPU_RING_PRIO_DEFAULT, NULL);
    881		if (r)
    882			return r;
    883	}
    884
    885	return r;
    886}
    887
    888static int sdma_v2_4_sw_fini(void *handle)
    889{
    890	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    891	int i;
    892
    893	for (i = 0; i < adev->sdma.num_instances; i++)
    894		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
    895
    896	sdma_v2_4_free_microcode(adev);
    897	return 0;
    898}
    899
    900static int sdma_v2_4_hw_init(void *handle)
    901{
    902	int r;
    903	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    904
    905	sdma_v2_4_init_golden_registers(adev);
    906
    907	r = sdma_v2_4_start(adev);
    908	if (r)
    909		return r;
    910
    911	return r;
    912}
    913
    914static int sdma_v2_4_hw_fini(void *handle)
    915{
    916	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    917
    918	sdma_v2_4_enable(adev, false);
    919
    920	return 0;
    921}
    922
    923static int sdma_v2_4_suspend(void *handle)
    924{
    925	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    926
    927	return sdma_v2_4_hw_fini(adev);
    928}
    929
    930static int sdma_v2_4_resume(void *handle)
    931{
    932	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    933
    934	return sdma_v2_4_hw_init(adev);
    935}
    936
    937static bool sdma_v2_4_is_idle(void *handle)
    938{
    939	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    940	u32 tmp = RREG32(mmSRBM_STATUS2);
    941
    942	if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
    943		   SRBM_STATUS2__SDMA1_BUSY_MASK))
    944	    return false;
    945
    946	return true;
    947}
    948
    949static int sdma_v2_4_wait_for_idle(void *handle)
    950{
    951	unsigned i;
    952	u32 tmp;
    953	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    954
    955	for (i = 0; i < adev->usec_timeout; i++) {
    956		tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
    957				SRBM_STATUS2__SDMA1_BUSY_MASK);
    958
    959		if (!tmp)
    960			return 0;
    961		udelay(1);
    962	}
    963	return -ETIMEDOUT;
    964}
    965
    966static int sdma_v2_4_soft_reset(void *handle)
    967{
    968	u32 srbm_soft_reset = 0;
    969	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    970	u32 tmp = RREG32(mmSRBM_STATUS2);
    971
    972	if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
    973		/* sdma0 */
    974		tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
    975		tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
    976		WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
    977		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
    978	}
    979	if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) {
    980		/* sdma1 */
    981		tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
    982		tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
    983		WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
    984		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
    985	}
    986
    987	if (srbm_soft_reset) {
    988		tmp = RREG32(mmSRBM_SOFT_RESET);
    989		tmp |= srbm_soft_reset;
    990		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
    991		WREG32(mmSRBM_SOFT_RESET, tmp);
    992		tmp = RREG32(mmSRBM_SOFT_RESET);
    993
    994		udelay(50);
    995
    996		tmp &= ~srbm_soft_reset;
    997		WREG32(mmSRBM_SOFT_RESET, tmp);
    998		tmp = RREG32(mmSRBM_SOFT_RESET);
    999
   1000		/* Wait a little for things to settle down */
   1001		udelay(50);
   1002	}
   1003
   1004	return 0;
   1005}
   1006
   1007static int sdma_v2_4_set_trap_irq_state(struct amdgpu_device *adev,
   1008					struct amdgpu_irq_src *src,
   1009					unsigned type,
   1010					enum amdgpu_interrupt_state state)
   1011{
   1012	u32 sdma_cntl;
   1013
   1014	switch (type) {
   1015	case AMDGPU_SDMA_IRQ_INSTANCE0:
   1016		switch (state) {
   1017		case AMDGPU_IRQ_STATE_DISABLE:
   1018			sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
   1019			sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
   1020			WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
   1021			break;
   1022		case AMDGPU_IRQ_STATE_ENABLE:
   1023			sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
   1024			sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
   1025			WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
   1026			break;
   1027		default:
   1028			break;
   1029		}
   1030		break;
   1031	case AMDGPU_SDMA_IRQ_INSTANCE1:
   1032		switch (state) {
   1033		case AMDGPU_IRQ_STATE_DISABLE:
   1034			sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
   1035			sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
   1036			WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
   1037			break;
   1038		case AMDGPU_IRQ_STATE_ENABLE:
   1039			sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
   1040			sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
   1041			WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
   1042			break;
   1043		default:
   1044			break;
   1045		}
   1046		break;
   1047	default:
   1048		break;
   1049	}
   1050	return 0;
   1051}
   1052
   1053static int sdma_v2_4_process_trap_irq(struct amdgpu_device *adev,
   1054				      struct amdgpu_irq_src *source,
   1055				      struct amdgpu_iv_entry *entry)
   1056{
   1057	u8 instance_id, queue_id;
   1058
   1059	instance_id = (entry->ring_id & 0x3) >> 0;
   1060	queue_id = (entry->ring_id & 0xc) >> 2;
   1061	DRM_DEBUG("IH: SDMA trap\n");
   1062	switch (instance_id) {
   1063	case 0:
   1064		switch (queue_id) {
   1065		case 0:
   1066			amdgpu_fence_process(&adev->sdma.instance[0].ring);
   1067			break;
   1068		case 1:
   1069			/* XXX compute */
   1070			break;
   1071		case 2:
   1072			/* XXX compute */
   1073			break;
   1074		}
   1075		break;
   1076	case 1:
   1077		switch (queue_id) {
   1078		case 0:
   1079			amdgpu_fence_process(&adev->sdma.instance[1].ring);
   1080			break;
   1081		case 1:
   1082			/* XXX compute */
   1083			break;
   1084		case 2:
   1085			/* XXX compute */
   1086			break;
   1087		}
   1088		break;
   1089	}
   1090	return 0;
   1091}
   1092
   1093static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
   1094					      struct amdgpu_irq_src *source,
   1095					      struct amdgpu_iv_entry *entry)
   1096{
   1097	u8 instance_id, queue_id;
   1098
   1099	DRM_ERROR("Illegal instruction in SDMA command stream\n");
   1100	instance_id = (entry->ring_id & 0x3) >> 0;
   1101	queue_id = (entry->ring_id & 0xc) >> 2;
   1102
   1103	if (instance_id <= 1 && queue_id == 0)
   1104		drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
   1105	return 0;
   1106}
   1107
   1108static int sdma_v2_4_set_clockgating_state(void *handle,
   1109					  enum amd_clockgating_state state)
   1110{
   1111	/* XXX handled via the smc on VI */
   1112	return 0;
   1113}
   1114
   1115static int sdma_v2_4_set_powergating_state(void *handle,
   1116					  enum amd_powergating_state state)
   1117{
   1118	return 0;
   1119}
   1120
   1121static const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
   1122	.name = "sdma_v2_4",
   1123	.early_init = sdma_v2_4_early_init,
   1124	.late_init = NULL,
   1125	.sw_init = sdma_v2_4_sw_init,
   1126	.sw_fini = sdma_v2_4_sw_fini,
   1127	.hw_init = sdma_v2_4_hw_init,
   1128	.hw_fini = sdma_v2_4_hw_fini,
   1129	.suspend = sdma_v2_4_suspend,
   1130	.resume = sdma_v2_4_resume,
   1131	.is_idle = sdma_v2_4_is_idle,
   1132	.wait_for_idle = sdma_v2_4_wait_for_idle,
   1133	.soft_reset = sdma_v2_4_soft_reset,
   1134	.set_clockgating_state = sdma_v2_4_set_clockgating_state,
   1135	.set_powergating_state = sdma_v2_4_set_powergating_state,
   1136};
   1137
   1138static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
   1139	.type = AMDGPU_RING_TYPE_SDMA,
   1140	.align_mask = 0xf,
   1141	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
   1142	.support_64bit_ptrs = false,
   1143	.secure_submission_supported = true,
   1144	.get_rptr = sdma_v2_4_ring_get_rptr,
   1145	.get_wptr = sdma_v2_4_ring_get_wptr,
   1146	.set_wptr = sdma_v2_4_ring_set_wptr,
   1147	.emit_frame_size =
   1148		6 + /* sdma_v2_4_ring_emit_hdp_flush */
   1149		3 + /* hdp invalidate */
   1150		6 + /* sdma_v2_4_ring_emit_pipeline_sync */
   1151		VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v2_4_ring_emit_vm_flush */
   1152		10 + 10 + 10, /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */
   1153	.emit_ib_size = 7 + 6, /* sdma_v2_4_ring_emit_ib */
   1154	.emit_ib = sdma_v2_4_ring_emit_ib,
   1155	.emit_fence = sdma_v2_4_ring_emit_fence,
   1156	.emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync,
   1157	.emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
   1158	.emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
   1159	.test_ring = sdma_v2_4_ring_test_ring,
   1160	.test_ib = sdma_v2_4_ring_test_ib,
   1161	.insert_nop = sdma_v2_4_ring_insert_nop,
   1162	.pad_ib = sdma_v2_4_ring_pad_ib,
   1163	.emit_wreg = sdma_v2_4_ring_emit_wreg,
   1164};
   1165
   1166static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
   1167{
   1168	int i;
   1169
   1170	for (i = 0; i < adev->sdma.num_instances; i++) {
   1171		adev->sdma.instance[i].ring.funcs = &sdma_v2_4_ring_funcs;
   1172		adev->sdma.instance[i].ring.me = i;
   1173	}
   1174}
   1175
   1176static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = {
   1177	.set = sdma_v2_4_set_trap_irq_state,
   1178	.process = sdma_v2_4_process_trap_irq,
   1179};
   1180
   1181static const struct amdgpu_irq_src_funcs sdma_v2_4_illegal_inst_irq_funcs = {
   1182	.process = sdma_v2_4_process_illegal_inst_irq,
   1183};
   1184
   1185static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
   1186{
   1187	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
   1188	adev->sdma.trap_irq.funcs = &sdma_v2_4_trap_irq_funcs;
   1189	adev->sdma.illegal_inst_irq.funcs = &sdma_v2_4_illegal_inst_irq_funcs;
   1190}
   1191
   1192/**
   1193 * sdma_v2_4_emit_copy_buffer - copy buffer using the sDMA engine
   1194 *
   1195 * @ib: indirect buffer to copy to
   1196 * @src_offset: src GPU address
   1197 * @dst_offset: dst GPU address
   1198 * @byte_count: number of bytes to xfer
   1199 * @tmz: unused
   1200 *
   1201 * Copy GPU buffers using the DMA engine (VI).
   1202 * Used by the amdgpu ttm implementation to move pages if
   1203 * registered as the asic copy callback.
   1204 */
   1205static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
   1206				       uint64_t src_offset,
   1207				       uint64_t dst_offset,
   1208				       uint32_t byte_count,
   1209				       bool tmz)
   1210{
   1211	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
   1212		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
   1213	ib->ptr[ib->length_dw++] = byte_count;
   1214	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
   1215	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
   1216	ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
   1217	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
   1218	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
   1219}
   1220
   1221/**
   1222 * sdma_v2_4_emit_fill_buffer - fill buffer using the sDMA engine
   1223 *
   1224 * @ib: indirect buffer to copy to
   1225 * @src_data: value to write to buffer
   1226 * @dst_offset: dst GPU address
   1227 * @byte_count: number of bytes to xfer
   1228 *
   1229 * Fill GPU buffers using the DMA engine (VI).
   1230 */
   1231static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ib *ib,
   1232				       uint32_t src_data,
   1233				       uint64_t dst_offset,
   1234				       uint32_t byte_count)
   1235{
   1236	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
   1237	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
   1238	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
   1239	ib->ptr[ib->length_dw++] = src_data;
   1240	ib->ptr[ib->length_dw++] = byte_count;
   1241}
   1242
   1243static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = {
   1244	.copy_max_bytes = 0x1fffff,
   1245	.copy_num_dw = 7,
   1246	.emit_copy_buffer = sdma_v2_4_emit_copy_buffer,
   1247
   1248	.fill_max_bytes = 0x1fffff,
   1249	.fill_num_dw = 7,
   1250	.emit_fill_buffer = sdma_v2_4_emit_fill_buffer,
   1251};
   1252
   1253static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
   1254{
   1255	adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs;
   1256	adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
   1257}
   1258
   1259static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
   1260	.copy_pte_num_dw = 7,
   1261	.copy_pte = sdma_v2_4_vm_copy_pte,
   1262
   1263	.write_pte = sdma_v2_4_vm_write_pte,
   1264	.set_pte_pde = sdma_v2_4_vm_set_pte_pde,
   1265};
   1266
   1267static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
   1268{
   1269	unsigned i;
   1270
   1271	adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
   1272	for (i = 0; i < adev->sdma.num_instances; i++) {
   1273		adev->vm_manager.vm_pte_scheds[i] =
   1274			&adev->sdma.instance[i].ring.sched;
   1275	}
   1276	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
   1277}
   1278
   1279const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
   1280{
   1281	.type = AMD_IP_BLOCK_TYPE_SDMA,
   1282	.major = 2,
   1283	.minor = 4,
   1284	.rev = 0,
   1285	.funcs = &sdma_v2_4_ip_funcs,
   1286};