cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

gfx_v9_0.c (246872B)


      1/*
      2 * Copyright 2016 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 */
     23
     24#include <linux/delay.h>
     25#include <linux/kernel.h>
     26#include <linux/firmware.h>
     27#include <linux/module.h>
     28#include <linux/pci.h>
     29
     30#include "amdgpu.h"
     31#include "amdgpu_gfx.h"
     32#include "soc15.h"
     33#include "soc15d.h"
     34#include "amdgpu_atomfirmware.h"
     35#include "amdgpu_pm.h"
     36
     37#include "gc/gc_9_0_offset.h"
     38#include "gc/gc_9_0_sh_mask.h"
     39
     40#include "vega10_enum.h"
     41
     42#include "soc15_common.h"
     43#include "clearstate_gfx9.h"
     44#include "v9_structs.h"
     45
     46#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
     47
     48#include "amdgpu_ras.h"
     49
     50#include "gfx_v9_4.h"
     51#include "gfx_v9_0.h"
     52#include "gfx_v9_4_2.h"
     53
     54#include "asic_reg/pwr/pwr_10_0_offset.h"
     55#include "asic_reg/pwr/pwr_10_0_sh_mask.h"
     56#include "asic_reg/gc/gc_9_0_default.h"
     57
     58#define GFX9_NUM_GFX_RINGS     1
     59#define GFX9_MEC_HPD_SIZE 4096
     60#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
     61#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
     62
     63#define mmGCEA_PROBE_MAP                        0x070c
     64#define mmGCEA_PROBE_MAP_BASE_IDX               0
     65
     66MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
     67MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
     68MODULE_FIRMWARE("amdgpu/vega10_me.bin");
     69MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
     70MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
     71MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
     72
     73MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
     74MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
     75MODULE_FIRMWARE("amdgpu/vega12_me.bin");
     76MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
     77MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
     78MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
     79
     80MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
     81MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
     82MODULE_FIRMWARE("amdgpu/vega20_me.bin");
     83MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
     84MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
     85MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
     86
     87MODULE_FIRMWARE("amdgpu/raven_ce.bin");
     88MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
     89MODULE_FIRMWARE("amdgpu/raven_me.bin");
     90MODULE_FIRMWARE("amdgpu/raven_mec.bin");
     91MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
     92MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
     93
     94MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
     95MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
     96MODULE_FIRMWARE("amdgpu/picasso_me.bin");
     97MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
     98MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
     99MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
    100MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
    101
    102MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
    103MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
    104MODULE_FIRMWARE("amdgpu/raven2_me.bin");
    105MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
    106MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
    107MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
    108MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
    109
    110MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
    111MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
    112
    113MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
    114MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
    115MODULE_FIRMWARE("amdgpu/renoir_me.bin");
    116MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
    117MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
    118
    119MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
    120MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
    121MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
    122MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
    123MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
    124MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
    125
    126MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
    127MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
    128MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
    129
    130#define mmTCP_CHAN_STEER_0_ARCT								0x0b03
    131#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
    132#define mmTCP_CHAN_STEER_1_ARCT								0x0b04
    133#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
    134#define mmTCP_CHAN_STEER_2_ARCT								0x0b09
    135#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
    136#define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
    137#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
    138#define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
    139#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
    140#define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
    141#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
    142
    143#define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
    144#define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
    145#define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
    146#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
    147
    148enum ta_ras_gfx_subblock {
    149	/*CPC*/
    150	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
    151	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
    152	TA_RAS_BLOCK__GFX_CPC_UCODE,
    153	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
    154	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
    155	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
    156	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
    157	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
    158	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
    159	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
    160	/* CPF*/
    161	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
    162	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
    163	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
    164	TA_RAS_BLOCK__GFX_CPF_TAG,
    165	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
    166	/* CPG*/
    167	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
    168	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
    169	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
    170	TA_RAS_BLOCK__GFX_CPG_TAG,
    171	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
    172	/* GDS*/
    173	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
    174	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
    175	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
    176	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
    177	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
    178	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
    179	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
    180	/* SPI*/
    181	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
    182	/* SQ*/
    183	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
    184	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
    185	TA_RAS_BLOCK__GFX_SQ_LDS_D,
    186	TA_RAS_BLOCK__GFX_SQ_LDS_I,
    187	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
    188	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
    189	/* SQC (3 ranges)*/
    190	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
    191	/* SQC range 0*/
    192	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
    193	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
    194		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
    195	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
    196	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
    197	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
    198	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
    199	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
    200	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
    201	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
    202		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
    203	/* SQC range 1*/
    204	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
    205	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
    206		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
    207	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
    208	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
    209	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
    210	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
    211	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
    212	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
    213	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
    214	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
    215	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
    216		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
    217	/* SQC range 2*/
    218	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
    219	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
    220		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
    221	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
    222	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
    223	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
    224	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
    225	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
    226	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
    227	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
    228	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
    229	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
    230		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
    231	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
    232	/* TA*/
    233	TA_RAS_BLOCK__GFX_TA_INDEX_START,
    234	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
    235	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
    236	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
    237	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
    238	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
    239	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
    240	/* TCA*/
    241	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
    242	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
    243	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
    244	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
    245	/* TCC (5 sub-ranges)*/
    246	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
    247	/* TCC range 0*/
    248	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
    249	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
    250	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
    251	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
    252	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
    253	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
    254	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
    255	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
    256	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
    257	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
    258	/* TCC range 1*/
    259	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
    260	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
    261	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
    262	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
    263		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
    264	/* TCC range 2*/
    265	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
    266	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
    267	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
    268	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
    269	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
    270	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
    271	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
    272	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
    273	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
    274	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
    275		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
    276	/* TCC range 3*/
    277	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
    278	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
    279	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
    280	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
    281		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
    282	/* TCC range 4*/
    283	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
    284	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
    285		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
    286	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
    287	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
    288		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
    289	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
    290	/* TCI*/
    291	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
    292	/* TCP*/
    293	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
    294	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
    295	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
    296	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
    297	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
    298	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
    299	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
    300	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
    301	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
    302	/* TD*/
    303	TA_RAS_BLOCK__GFX_TD_INDEX_START,
    304	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
    305	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
    306	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
    307	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
    308	/* EA (3 sub-ranges)*/
    309	TA_RAS_BLOCK__GFX_EA_INDEX_START,
    310	/* EA range 0*/
    311	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
    312	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
    313	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
    314	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
    315	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
    316	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
    317	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
    318	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
    319	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
    320	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
    321	/* EA range 1*/
    322	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
    323	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
    324	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
    325	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
    326	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
    327	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
    328	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
    329	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
    330	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
    331	/* EA range 2*/
    332	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
    333	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
    334	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
    335	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
    336	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
    337	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
    338	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
    339	/* UTC VM L2 bank*/
    340	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
    341	/* UTC VM walker*/
    342	TA_RAS_BLOCK__UTC_VML2_WALKER,
    343	/* UTC ATC L2 2MB cache*/
    344	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
    345	/* UTC ATC L2 4KB cache*/
    346	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
    347	TA_RAS_BLOCK__GFX_MAX
    348};
    349
    350struct ras_gfx_subblock {
    351	unsigned char *name;
    352	int ta_subblock;
    353	int hw_supported_error_type;
    354	int sw_supported_error_type;
    355};
    356
    357#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
    358	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
    359		#subblock,                                                     \
    360		TA_RAS_BLOCK__##subblock,                                      \
    361		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
    362		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
    363	}
    364
    365static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
    366	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
    367	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
    368	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
    369	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
    370	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
    371	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
    372	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
    373	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
    374	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
    375	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
    376	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
    377	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
    378	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
    379	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
    380	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
    381	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
    382	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
    383			     0),
    384	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
    385			     0),
    386	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
    387	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
    388	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
    389	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
    390	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
    391	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
    392	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
    393	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
    394			     0, 0),
    395	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
    396			     0),
    397	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
    398			     0, 0),
    399	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
    400			     0),
    401	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
    402			     0, 0),
    403	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
    404			     0),
    405	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
    406			     1),
    407	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
    408			     0, 0, 0),
    409	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
    410			     0),
    411	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
    412			     0),
    413	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
    414			     0),
    415	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
    416			     0),
    417	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
    418			     0),
    419	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
    420			     0, 0),
    421	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
    422			     0),
    423	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
    424			     0),
    425	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
    426			     0, 0, 0),
    427	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
    428			     0),
    429	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
    430			     0),
    431	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
    432			     0),
    433	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
    434			     0),
    435	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
    436			     0),
    437	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
    438			     0, 0),
    439	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
    440			     0),
    441	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
    442	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
    443	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
    444	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
    445	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
    446	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
    447	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
    448	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
    449	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
    450			     1),
    451	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
    452			     1),
    453	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
    454			     1),
    455	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
    456			     0),
    457	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
    458			     0),
    459	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
    460	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
    461	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
    462	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
    463	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
    464	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
    465	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
    466	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
    467	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
    468	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
    469	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
    470	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
    471			     0),
    472	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
    473	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
    474			     0),
    475	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
    476			     0, 0),
    477	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
    478			     0),
    479	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
    480	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
    481	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
    482	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
    483	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
    484	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
    485	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
    486	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
    487	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
    488	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
    489	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
    490	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
    491	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
    492	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
    493	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
    494	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
    495	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
    496	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
    497	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
    498	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
    499	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
    500	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
    501	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
    502	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
    503	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
    504	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
    505	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
    506	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
    507	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
    508	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
    509	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
    510	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
    511	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
    512	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
    513};
    514
    515static const struct soc15_reg_golden golden_settings_gc_9_0[] =
    516{
    517	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
    518	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
    519	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
    520	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
    521	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
    522	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
    523	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
    524	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
    525	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
    526	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
    527	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
    528	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
    529	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
    530	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
    531	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
    532	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
    533	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
    534	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
    535	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
    536	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
    537};
    538
    539static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
    540{
    541	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
    542	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
    543	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
    544	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
    545	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
    546	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
    547	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
    548	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
    549	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
    550	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
    551	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
    552	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
    553	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
    554	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
    555	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
    556	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
    557	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
    558	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
    559};
    560
    561static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
    562{
    563	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
    564	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
    565	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
    566	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
    567	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
    568	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
    569	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
    570	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
    571	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
    572	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
    573	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
    574};
    575
    576static const struct soc15_reg_golden golden_settings_gc_9_1[] =
    577{
    578	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
    579	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
    580	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
    581	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
    582	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
    583	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
    584	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
    585	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
    586	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
    587	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
    588	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
    589	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
    590	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
    591	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
    592	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
    593	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
    594	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
    595	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
    596	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
    597	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
    598	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
    599	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
    600	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
    601	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
    602};
    603
    604static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
    605{
    606	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
    607	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
    608	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
    609	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
    610	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
    611	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
    612	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
    613};
    614
    615static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
    616{
    617	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
    618	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
    619	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
    620	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
    621	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
    622	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
    623	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
    624	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
    625	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
    626	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
    627	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
    628	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
    629	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
    630	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
    631	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
    632	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
    633	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
    634	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
    635	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
    636};
    637
    638static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
    639{
    640	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
    641	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
    642	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
    643	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
    644	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
    645	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
    646	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
    647	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
    648	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
    649	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
    650	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
    651	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
    652};
    653
    654static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
    655{
    656	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
    657	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
    658	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
    659};
    660
    661static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
    662{
    663	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
    664	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
    665	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
    666	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
    667	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
    668	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
    669	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
    670	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
    671	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
    672	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
    673	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
    674	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
    675	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
    676	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
    677	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
    678	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
    679};
    680
    681static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
    682{
    683	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
    684	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
    685	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
    686	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
    687	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
    688	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
    689	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
    690	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
    691	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
    692	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
    693	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
    694	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
    695	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
    696};
    697
    698static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
    699{
    700	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
    701	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
    702	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
    703	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
    704	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
    705	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
    706	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
    707	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
    708	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
    709	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
    710	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
    711};
    712
    713static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
    714	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
    715	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
    716};
    717
    718static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
    719{
    720	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
    721	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
    722	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
    723	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
    724	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
    725	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
    726	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
    727	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
    728};
    729
    730static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
    731{
    732	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
    733	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
    734	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
    735	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
    736	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
    737	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
    738	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
    739	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
    740};
    741
    742#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
    743#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
    744#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
    745#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
    746
    747static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
    748static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
    749static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
    750static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
    751static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
    752				struct amdgpu_cu_info *cu_info);
    753static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
    754static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
    755static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
    756static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
    757					  void *ras_error_status);
    758static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
    759				     void *inject_if);
    760static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
    761
    762static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
    763				uint64_t queue_mask)
    764{
    765	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
    766	amdgpu_ring_write(kiq_ring,
    767		PACKET3_SET_RESOURCES_VMID_MASK(0) |
    768		/* vmid_mask:0* queue_type:0 (KIQ) */
    769		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
    770	amdgpu_ring_write(kiq_ring,
    771			lower_32_bits(queue_mask));	/* queue mask lo */
    772	amdgpu_ring_write(kiq_ring,
    773			upper_32_bits(queue_mask));	/* queue mask hi */
    774	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
    775	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
    776	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
    777	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
    778}
    779
    780static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
    781				 struct amdgpu_ring *ring)
    782{
    783	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
    784	uint64_t wptr_addr = ring->wptr_gpu_addr;
    785	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
    786
    787	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
    788	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
    789	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
    790			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
    791			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
    792			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
    793			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
    794			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
    795			 /*queue_type: normal compute queue */
    796			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
    797			 /* alloc format: all_on_one_pipe */
    798			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
    799			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
    800			 /* num_queues: must be 1 */
    801			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
    802	amdgpu_ring_write(kiq_ring,
    803			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
    804	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
    805	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
    806	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
    807	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
    808}
    809
    810static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
    811				   struct amdgpu_ring *ring,
    812				   enum amdgpu_unmap_queues_action action,
    813				   u64 gpu_addr, u64 seq)
    814{
    815	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
    816
    817	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
    818	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
    819			  PACKET3_UNMAP_QUEUES_ACTION(action) |
    820			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
    821			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
    822			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
    823	amdgpu_ring_write(kiq_ring,
    824			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
    825
    826	if (action == PREEMPT_QUEUES_NO_UNMAP) {
    827		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
    828		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
    829		amdgpu_ring_write(kiq_ring, seq);
    830	} else {
    831		amdgpu_ring_write(kiq_ring, 0);
    832		amdgpu_ring_write(kiq_ring, 0);
    833		amdgpu_ring_write(kiq_ring, 0);
    834	}
    835}
    836
    837static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
    838				   struct amdgpu_ring *ring,
    839				   u64 addr,
    840				   u64 seq)
    841{
    842	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
    843
    844	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
    845	amdgpu_ring_write(kiq_ring,
    846			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
    847			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
    848			  PACKET3_QUERY_STATUS_COMMAND(2));
    849	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
    850	amdgpu_ring_write(kiq_ring,
    851			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
    852			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
    853	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
    854	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
    855	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
    856	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
    857}
    858
    859static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
    860				uint16_t pasid, uint32_t flush_type,
    861				bool all_hub)
    862{
    863	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
    864	amdgpu_ring_write(kiq_ring,
    865			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
    866			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
    867			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
    868			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
    869}
    870
    871static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
    872	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
    873	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
    874	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
    875	.kiq_query_status = gfx_v9_0_kiq_query_status,
    876	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
    877	.set_resources_size = 8,
    878	.map_queues_size = 7,
    879	.unmap_queues_size = 6,
    880	.query_status_size = 7,
    881	.invalidate_tlbs_size = 2,
    882};
    883
    884static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
    885{
    886	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
    887}
    888
    889static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
    890{
    891	switch (adev->ip_versions[GC_HWIP][0]) {
    892	case IP_VERSION(9, 0, 1):
    893		soc15_program_register_sequence(adev,
    894						golden_settings_gc_9_0,
    895						ARRAY_SIZE(golden_settings_gc_9_0));
    896		soc15_program_register_sequence(adev,
    897						golden_settings_gc_9_0_vg10,
    898						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
    899		break;
    900	case IP_VERSION(9, 2, 1):
    901		soc15_program_register_sequence(adev,
    902						golden_settings_gc_9_2_1,
    903						ARRAY_SIZE(golden_settings_gc_9_2_1));
    904		soc15_program_register_sequence(adev,
    905						golden_settings_gc_9_2_1_vg12,
    906						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
    907		break;
    908	case IP_VERSION(9, 4, 0):
    909		soc15_program_register_sequence(adev,
    910						golden_settings_gc_9_0,
    911						ARRAY_SIZE(golden_settings_gc_9_0));
    912		soc15_program_register_sequence(adev,
    913						golden_settings_gc_9_0_vg20,
    914						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
    915		break;
    916	case IP_VERSION(9, 4, 1):
    917		soc15_program_register_sequence(adev,
    918						golden_settings_gc_9_4_1_arct,
    919						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
    920		break;
    921	case IP_VERSION(9, 2, 2):
    922	case IP_VERSION(9, 1, 0):
    923		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
    924						ARRAY_SIZE(golden_settings_gc_9_1));
    925		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
    926			soc15_program_register_sequence(adev,
    927							golden_settings_gc_9_1_rv2,
    928							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
    929		else
    930			soc15_program_register_sequence(adev,
    931							golden_settings_gc_9_1_rv1,
    932							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
    933		break;
    934	 case IP_VERSION(9, 3, 0):
    935		soc15_program_register_sequence(adev,
    936						golden_settings_gc_9_1_rn,
    937						ARRAY_SIZE(golden_settings_gc_9_1_rn));
    938		return; /* for renoir, don't need common goldensetting */
    939	case IP_VERSION(9, 4, 2):
    940		gfx_v9_4_2_init_golden_registers(adev,
    941						 adev->smuio.funcs->get_die_id(adev));
    942		break;
    943	default:
    944		break;
    945	}
    946
    947	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
    948	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
    949		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
    950						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
    951}
    952
    953static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
    954				       bool wc, uint32_t reg, uint32_t val)
    955{
    956	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
    957	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
    958				WRITE_DATA_DST_SEL(0) |
    959				(wc ? WR_CONFIRM : 0));
    960	amdgpu_ring_write(ring, reg);
    961	amdgpu_ring_write(ring, 0);
    962	amdgpu_ring_write(ring, val);
    963}
    964
    965static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
    966				  int mem_space, int opt, uint32_t addr0,
    967				  uint32_t addr1, uint32_t ref, uint32_t mask,
    968				  uint32_t inv)
    969{
    970	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
    971	amdgpu_ring_write(ring,
    972				 /* memory (1) or register (0) */
    973				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
    974				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
    975				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
    976				 WAIT_REG_MEM_ENGINE(eng_sel)));
    977
    978	if (mem_space)
    979		BUG_ON(addr0 & 0x3); /* Dword align */
    980	amdgpu_ring_write(ring, addr0);
    981	amdgpu_ring_write(ring, addr1);
    982	amdgpu_ring_write(ring, ref);
    983	amdgpu_ring_write(ring, mask);
    984	amdgpu_ring_write(ring, inv); /* poll interval */
    985}
    986
    987static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
    988{
    989	struct amdgpu_device *adev = ring->adev;
    990	uint32_t tmp = 0;
    991	unsigned i;
    992	int r;
    993
    994	WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD);
    995	r = amdgpu_ring_alloc(ring, 3);
    996	if (r)
    997		return r;
    998
    999	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
   1000	amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) -
   1001			  PACKET3_SET_UCONFIG_REG_START);
   1002	amdgpu_ring_write(ring, 0xDEADBEEF);
   1003	amdgpu_ring_commit(ring);
   1004
   1005	for (i = 0; i < adev->usec_timeout; i++) {
   1006		tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0);
   1007		if (tmp == 0xDEADBEEF)
   1008			break;
   1009		udelay(1);
   1010	}
   1011
   1012	if (i >= adev->usec_timeout)
   1013		r = -ETIMEDOUT;
   1014	return r;
   1015}
   1016
   1017static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
   1018{
   1019	struct amdgpu_device *adev = ring->adev;
   1020	struct amdgpu_ib ib;
   1021	struct dma_fence *f = NULL;
   1022
   1023	unsigned index;
   1024	uint64_t gpu_addr;
   1025	uint32_t tmp;
   1026	long r;
   1027
   1028	r = amdgpu_device_wb_get(adev, &index);
   1029	if (r)
   1030		return r;
   1031
   1032	gpu_addr = adev->wb.gpu_addr + (index * 4);
   1033	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
   1034	memset(&ib, 0, sizeof(ib));
   1035	r = amdgpu_ib_get(adev, NULL, 16,
   1036					AMDGPU_IB_POOL_DIRECT, &ib);
   1037	if (r)
   1038		goto err1;
   1039
   1040	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
   1041	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
   1042	ib.ptr[2] = lower_32_bits(gpu_addr);
   1043	ib.ptr[3] = upper_32_bits(gpu_addr);
   1044	ib.ptr[4] = 0xDEADBEEF;
   1045	ib.length_dw = 5;
   1046
   1047	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
   1048	if (r)
   1049		goto err2;
   1050
   1051	r = dma_fence_wait_timeout(f, false, timeout);
   1052	if (r == 0) {
   1053		r = -ETIMEDOUT;
   1054		goto err2;
   1055	} else if (r < 0) {
   1056		goto err2;
   1057	}
   1058
   1059	tmp = adev->wb.wb[index];
   1060	if (tmp == 0xDEADBEEF)
   1061		r = 0;
   1062	else
   1063		r = -EINVAL;
   1064
   1065err2:
   1066	amdgpu_ib_free(adev, &ib, NULL);
   1067	dma_fence_put(f);
   1068err1:
   1069	amdgpu_device_wb_free(adev, index);
   1070	return r;
   1071}
   1072
   1073
   1074static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
   1075{
   1076	release_firmware(adev->gfx.pfp_fw);
   1077	adev->gfx.pfp_fw = NULL;
   1078	release_firmware(adev->gfx.me_fw);
   1079	adev->gfx.me_fw = NULL;
   1080	release_firmware(adev->gfx.ce_fw);
   1081	adev->gfx.ce_fw = NULL;
   1082	release_firmware(adev->gfx.rlc_fw);
   1083	adev->gfx.rlc_fw = NULL;
   1084	release_firmware(adev->gfx.mec_fw);
   1085	adev->gfx.mec_fw = NULL;
   1086	release_firmware(adev->gfx.mec2_fw);
   1087	adev->gfx.mec2_fw = NULL;
   1088
   1089	kfree(adev->gfx.rlc.register_list_format);
   1090}
   1091
   1092static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
   1093{
   1094	const struct rlc_firmware_header_v2_1 *rlc_hdr;
   1095
   1096	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
   1097	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
   1098	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
   1099	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
   1100	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
   1101	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
   1102	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
   1103	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
   1104	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
   1105	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
   1106	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
   1107	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
   1108	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
   1109	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
   1110			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
   1111}
   1112
   1113static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
   1114{
   1115	adev->gfx.me_fw_write_wait = false;
   1116	adev->gfx.mec_fw_write_wait = false;
   1117
   1118	if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
   1119	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
   1120	    (adev->gfx.mec_feature_version < 46) ||
   1121	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
   1122	    (adev->gfx.pfp_feature_version < 46)))
   1123		DRM_WARN_ONCE("CP firmware version too old, please update!");
   1124
   1125	switch (adev->ip_versions[GC_HWIP][0]) {
   1126	case IP_VERSION(9, 0, 1):
   1127		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
   1128		    (adev->gfx.me_feature_version >= 42) &&
   1129		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
   1130		    (adev->gfx.pfp_feature_version >= 42))
   1131			adev->gfx.me_fw_write_wait = true;
   1132
   1133		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
   1134		    (adev->gfx.mec_feature_version >= 42))
   1135			adev->gfx.mec_fw_write_wait = true;
   1136		break;
   1137	case IP_VERSION(9, 2, 1):
   1138		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
   1139		    (adev->gfx.me_feature_version >= 44) &&
   1140		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
   1141		    (adev->gfx.pfp_feature_version >= 44))
   1142			adev->gfx.me_fw_write_wait = true;
   1143
   1144		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
   1145		    (adev->gfx.mec_feature_version >= 44))
   1146			adev->gfx.mec_fw_write_wait = true;
   1147		break;
   1148	case IP_VERSION(9, 4, 0):
   1149		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
   1150		    (adev->gfx.me_feature_version >= 44) &&
   1151		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
   1152		    (adev->gfx.pfp_feature_version >= 44))
   1153			adev->gfx.me_fw_write_wait = true;
   1154
   1155		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
   1156		    (adev->gfx.mec_feature_version >= 44))
   1157			adev->gfx.mec_fw_write_wait = true;
   1158		break;
   1159	case IP_VERSION(9, 1, 0):
   1160	case IP_VERSION(9, 2, 2):
   1161		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
   1162		    (adev->gfx.me_feature_version >= 42) &&
   1163		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
   1164		    (adev->gfx.pfp_feature_version >= 42))
   1165			adev->gfx.me_fw_write_wait = true;
   1166
   1167		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
   1168		    (adev->gfx.mec_feature_version >= 42))
   1169			adev->gfx.mec_fw_write_wait = true;
   1170		break;
   1171	default:
   1172		adev->gfx.me_fw_write_wait = true;
   1173		adev->gfx.mec_fw_write_wait = true;
   1174		break;
   1175	}
   1176}
   1177
   1178struct amdgpu_gfxoff_quirk {
   1179	u16 chip_vendor;
   1180	u16 chip_device;
   1181	u16 subsys_vendor;
   1182	u16 subsys_device;
   1183	u8 revision;
   1184};
   1185
   1186static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
   1187	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
   1188	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
   1189	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
   1190	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
   1191	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
   1192	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
   1193	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
   1194	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
   1195	{ 0, 0, 0, 0, 0 },
   1196};
   1197
   1198static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
   1199{
   1200	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
   1201
   1202	while (p && p->chip_device != 0) {
   1203		if (pdev->vendor == p->chip_vendor &&
   1204		    pdev->device == p->chip_device &&
   1205		    pdev->subsystem_vendor == p->subsys_vendor &&
   1206		    pdev->subsystem_device == p->subsys_device &&
   1207		    pdev->revision == p->revision) {
   1208			return true;
   1209		}
   1210		++p;
   1211	}
   1212	return false;
   1213}
   1214
   1215static bool is_raven_kicker(struct amdgpu_device *adev)
   1216{
   1217	if (adev->pm.fw_version >= 0x41e2b)
   1218		return true;
   1219	else
   1220		return false;
   1221}
   1222
   1223static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
   1224{
   1225	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
   1226	    (adev->gfx.me_fw_version >= 0x000000a5) &&
   1227	    (adev->gfx.me_feature_version >= 52))
   1228		return true;
   1229	else
   1230		return false;
   1231}
   1232
   1233static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
   1234{
   1235	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
   1236		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
   1237
   1238	switch (adev->ip_versions[GC_HWIP][0]) {
   1239	case IP_VERSION(9, 0, 1):
   1240	case IP_VERSION(9, 2, 1):
   1241	case IP_VERSION(9, 4, 0):
   1242		break;
   1243	case IP_VERSION(9, 2, 2):
   1244	case IP_VERSION(9, 1, 0):
   1245		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
   1246		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
   1247		    ((!is_raven_kicker(adev) &&
   1248		      adev->gfx.rlc_fw_version < 531) ||
   1249		     (adev->gfx.rlc_feature_version < 1) ||
   1250		     !adev->gfx.rlc.is_rlc_v2_1))
   1251			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
   1252
   1253		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
   1254			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
   1255				AMD_PG_SUPPORT_CP |
   1256				AMD_PG_SUPPORT_RLC_SMU_HS;
   1257		break;
   1258	case IP_VERSION(9, 3, 0):
   1259		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
   1260			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
   1261				AMD_PG_SUPPORT_CP |
   1262				AMD_PG_SUPPORT_RLC_SMU_HS;
   1263		break;
   1264	default:
   1265		break;
   1266	}
   1267}
   1268
   1269static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
   1270					  const char *chip_name)
   1271{
   1272	char fw_name[30];
   1273	int err;
   1274	struct amdgpu_firmware_info *info = NULL;
   1275	const struct common_firmware_header *header = NULL;
   1276	const struct gfx_firmware_header_v1_0 *cp_hdr;
   1277
   1278	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
   1279	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
   1280	if (err)
   1281		goto out;
   1282	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
   1283	if (err)
   1284		goto out;
   1285	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
   1286	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
   1287	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
   1288
   1289	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
   1290	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
   1291	if (err)
   1292		goto out;
   1293	err = amdgpu_ucode_validate(adev->gfx.me_fw);
   1294	if (err)
   1295		goto out;
   1296	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
   1297	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
   1298	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
   1299
   1300	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
   1301	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
   1302	if (err)
   1303		goto out;
   1304	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
   1305	if (err)
   1306		goto out;
   1307	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
   1308	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
   1309	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
   1310
   1311	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
   1312		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
   1313		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
   1314		info->fw = adev->gfx.pfp_fw;
   1315		header = (const struct common_firmware_header *)info->fw->data;
   1316		adev->firmware.fw_size +=
   1317			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
   1318
   1319		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
   1320		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
   1321		info->fw = adev->gfx.me_fw;
   1322		header = (const struct common_firmware_header *)info->fw->data;
   1323		adev->firmware.fw_size +=
   1324			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
   1325
   1326		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
   1327		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
   1328		info->fw = adev->gfx.ce_fw;
   1329		header = (const struct common_firmware_header *)info->fw->data;
   1330		adev->firmware.fw_size +=
   1331			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
   1332	}
   1333
   1334out:
   1335	if (err) {
   1336		dev_err(adev->dev,
   1337			"gfx9: Failed to load firmware \"%s\"\n",
   1338			fw_name);
   1339		release_firmware(adev->gfx.pfp_fw);
   1340		adev->gfx.pfp_fw = NULL;
   1341		release_firmware(adev->gfx.me_fw);
   1342		adev->gfx.me_fw = NULL;
   1343		release_firmware(adev->gfx.ce_fw);
   1344		adev->gfx.ce_fw = NULL;
   1345	}
   1346	return err;
   1347}
   1348
   1349static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
   1350					  const char *chip_name)
   1351{
   1352	char fw_name[30];
   1353	int err;
   1354	struct amdgpu_firmware_info *info = NULL;
   1355	const struct common_firmware_header *header = NULL;
   1356	const struct rlc_firmware_header_v2_0 *rlc_hdr;
   1357	unsigned int *tmp = NULL;
   1358	unsigned int i = 0;
   1359	uint16_t version_major;
   1360	uint16_t version_minor;
   1361	uint32_t smu_version;
   1362
   1363	/*
   1364	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
   1365	 * instead of picasso_rlc.bin.
   1366	 * Judgment method:
   1367	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
   1368	 *          or revision >= 0xD8 && revision <= 0xDF
   1369	 * otherwise is PCO FP5
   1370	 */
   1371	if (!strcmp(chip_name, "picasso") &&
   1372		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
   1373		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
   1374		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
   1375	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
   1376		(smu_version >= 0x41e2b))
   1377		/**
   1378		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
   1379		*/
   1380		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
   1381	else
   1382		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
   1383	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
   1384	if (err)
   1385		goto out;
   1386	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
   1387	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
   1388
   1389	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
   1390	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
   1391	if (version_major == 2 && version_minor == 1)
   1392		adev->gfx.rlc.is_rlc_v2_1 = true;
   1393
   1394	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
   1395	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
   1396	adev->gfx.rlc.save_and_restore_offset =
   1397			le32_to_cpu(rlc_hdr->save_and_restore_offset);
   1398	adev->gfx.rlc.clear_state_descriptor_offset =
   1399			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
   1400	adev->gfx.rlc.avail_scratch_ram_locations =
   1401			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
   1402	adev->gfx.rlc.reg_restore_list_size =
   1403			le32_to_cpu(rlc_hdr->reg_restore_list_size);
   1404	adev->gfx.rlc.reg_list_format_start =
   1405			le32_to_cpu(rlc_hdr->reg_list_format_start);
   1406	adev->gfx.rlc.reg_list_format_separate_start =
   1407			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
   1408	adev->gfx.rlc.starting_offsets_start =
   1409			le32_to_cpu(rlc_hdr->starting_offsets_start);
   1410	adev->gfx.rlc.reg_list_format_size_bytes =
   1411			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
   1412	adev->gfx.rlc.reg_list_size_bytes =
   1413			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
   1414	adev->gfx.rlc.register_list_format =
   1415			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
   1416				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
   1417	if (!adev->gfx.rlc.register_list_format) {
   1418		err = -ENOMEM;
   1419		goto out;
   1420	}
   1421
   1422	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
   1423			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
   1424	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
   1425		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
   1426
   1427	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
   1428
   1429	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
   1430			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
   1431	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
   1432		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
   1433
   1434	if (adev->gfx.rlc.is_rlc_v2_1)
   1435		gfx_v9_0_init_rlc_ext_microcode(adev);
   1436
   1437	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
   1438		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
   1439		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
   1440		info->fw = adev->gfx.rlc_fw;
   1441		header = (const struct common_firmware_header *)info->fw->data;
   1442		adev->firmware.fw_size +=
   1443			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
   1444
   1445		if (adev->gfx.rlc.is_rlc_v2_1 &&
   1446		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
   1447		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
   1448		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
   1449			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
   1450			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
   1451			info->fw = adev->gfx.rlc_fw;
   1452			adev->firmware.fw_size +=
   1453				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
   1454
   1455			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
   1456			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
   1457			info->fw = adev->gfx.rlc_fw;
   1458			adev->firmware.fw_size +=
   1459				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
   1460
   1461			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
   1462			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
   1463			info->fw = adev->gfx.rlc_fw;
   1464			adev->firmware.fw_size +=
   1465				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
   1466		}
   1467	}
   1468
   1469out:
   1470	if (err) {
   1471		dev_err(adev->dev,
   1472			"gfx9: Failed to load firmware \"%s\"\n",
   1473			fw_name);
   1474		release_firmware(adev->gfx.rlc_fw);
   1475		adev->gfx.rlc_fw = NULL;
   1476	}
   1477	return err;
   1478}
   1479
   1480static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
   1481{
   1482	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
   1483	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
   1484	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
   1485		return false;
   1486
   1487	return true;
   1488}
   1489
   1490static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
   1491					  const char *chip_name)
   1492{
   1493	char fw_name[30];
   1494	int err;
   1495	struct amdgpu_firmware_info *info = NULL;
   1496	const struct common_firmware_header *header = NULL;
   1497	const struct gfx_firmware_header_v1_0 *cp_hdr;
   1498
   1499	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
   1500	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
   1501	if (err)
   1502		goto out;
   1503	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
   1504	if (err)
   1505		goto out;
   1506	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
   1507	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
   1508	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
   1509
   1510
   1511	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
   1512		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
   1513		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
   1514		if (!err) {
   1515			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
   1516			if (err)
   1517				goto out;
   1518			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
   1519			adev->gfx.mec2_fw->data;
   1520			adev->gfx.mec2_fw_version =
   1521			le32_to_cpu(cp_hdr->header.ucode_version);
   1522			adev->gfx.mec2_feature_version =
   1523			le32_to_cpu(cp_hdr->ucode_feature_version);
   1524		} else {
   1525			err = 0;
   1526			adev->gfx.mec2_fw = NULL;
   1527		}
   1528	} else {
   1529		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
   1530		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
   1531	}
   1532
   1533	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
   1534		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
   1535		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
   1536		info->fw = adev->gfx.mec_fw;
   1537		header = (const struct common_firmware_header *)info->fw->data;
   1538		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
   1539		adev->firmware.fw_size +=
   1540			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
   1541
   1542		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
   1543		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
   1544		info->fw = adev->gfx.mec_fw;
   1545		adev->firmware.fw_size +=
   1546			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
   1547
   1548		if (adev->gfx.mec2_fw) {
   1549			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
   1550			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
   1551			info->fw = adev->gfx.mec2_fw;
   1552			header = (const struct common_firmware_header *)info->fw->data;
   1553			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
   1554			adev->firmware.fw_size +=
   1555				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
   1556
   1557			/* TODO: Determine if MEC2 JT FW loading can be removed
   1558				 for all GFX V9 asic and above */
   1559			if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
   1560				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
   1561				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
   1562				info->fw = adev->gfx.mec2_fw;
   1563				adev->firmware.fw_size +=
   1564					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
   1565					PAGE_SIZE);
   1566			}
   1567		}
   1568	}
   1569
   1570out:
   1571	gfx_v9_0_check_if_need_gfxoff(adev);
   1572	gfx_v9_0_check_fw_write_wait(adev);
   1573	if (err) {
   1574		dev_err(adev->dev,
   1575			"gfx9: Failed to load firmware \"%s\"\n",
   1576			fw_name);
   1577		release_firmware(adev->gfx.mec_fw);
   1578		adev->gfx.mec_fw = NULL;
   1579		release_firmware(adev->gfx.mec2_fw);
   1580		adev->gfx.mec2_fw = NULL;
   1581	}
   1582	return err;
   1583}
   1584
   1585static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
   1586{
   1587	const char *chip_name;
   1588	int r;
   1589
   1590	DRM_DEBUG("\n");
   1591
   1592	switch (adev->ip_versions[GC_HWIP][0]) {
   1593	case IP_VERSION(9, 0, 1):
   1594		chip_name = "vega10";
   1595		break;
   1596	case IP_VERSION(9, 2, 1):
   1597		chip_name = "vega12";
   1598		break;
   1599	case IP_VERSION(9, 4, 0):
   1600		chip_name = "vega20";
   1601		break;
   1602	case IP_VERSION(9, 2, 2):
   1603	case IP_VERSION(9, 1, 0):
   1604		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
   1605			chip_name = "raven2";
   1606		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
   1607			chip_name = "picasso";
   1608		else
   1609			chip_name = "raven";
   1610		break;
   1611	case IP_VERSION(9, 4, 1):
   1612		chip_name = "arcturus";
   1613		break;
   1614	case IP_VERSION(9, 3, 0):
   1615		if (adev->apu_flags & AMD_APU_IS_RENOIR)
   1616			chip_name = "renoir";
   1617		else
   1618			chip_name = "green_sardine";
   1619		break;
   1620	case IP_VERSION(9, 4, 2):
   1621		chip_name = "aldebaran";
   1622		break;
   1623	default:
   1624		BUG();
   1625	}
   1626
   1627	/* No CPG in Arcturus */
   1628	if (adev->gfx.num_gfx_rings) {
   1629		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
   1630		if (r)
   1631			return r;
   1632	}
   1633
   1634	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
   1635	if (r)
   1636		return r;
   1637
   1638	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
   1639	if (r)
   1640		return r;
   1641
   1642	return r;
   1643}
   1644
   1645static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
   1646{
   1647	u32 count = 0;
   1648	const struct cs_section_def *sect = NULL;
   1649	const struct cs_extent_def *ext = NULL;
   1650
   1651	/* begin clear state */
   1652	count += 2;
   1653	/* context control state */
   1654	count += 3;
   1655
   1656	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
   1657		for (ext = sect->section; ext->extent != NULL; ++ext) {
   1658			if (sect->id == SECT_CONTEXT)
   1659				count += 2 + ext->reg_count;
   1660			else
   1661				return 0;
   1662		}
   1663	}
   1664
   1665	/* end clear state */
   1666	count += 2;
   1667	/* clear state */
   1668	count += 2;
   1669
   1670	return count;
   1671}
   1672
   1673static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
   1674				    volatile u32 *buffer)
   1675{
   1676	u32 count = 0, i;
   1677	const struct cs_section_def *sect = NULL;
   1678	const struct cs_extent_def *ext = NULL;
   1679
   1680	if (adev->gfx.rlc.cs_data == NULL)
   1681		return;
   1682	if (buffer == NULL)
   1683		return;
   1684
   1685	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   1686	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
   1687
   1688	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   1689	buffer[count++] = cpu_to_le32(0x80000000);
   1690	buffer[count++] = cpu_to_le32(0x80000000);
   1691
   1692	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
   1693		for (ext = sect->section; ext->extent != NULL; ++ext) {
   1694			if (sect->id == SECT_CONTEXT) {
   1695				buffer[count++] =
   1696					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
   1697				buffer[count++] = cpu_to_le32(ext->reg_index -
   1698						PACKET3_SET_CONTEXT_REG_START);
   1699				for (i = 0; i < ext->reg_count; i++)
   1700					buffer[count++] = cpu_to_le32(ext->extent[i]);
   1701			} else {
   1702				return;
   1703			}
   1704		}
   1705	}
   1706
   1707	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   1708	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
   1709
   1710	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
   1711	buffer[count++] = cpu_to_le32(0);
   1712}
   1713
   1714static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
   1715{
   1716	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
   1717	uint32_t pg_always_on_cu_num = 2;
   1718	uint32_t always_on_cu_num;
   1719	uint32_t i, j, k;
   1720	uint32_t mask, cu_bitmap, counter;
   1721
   1722	if (adev->flags & AMD_IS_APU)
   1723		always_on_cu_num = 4;
   1724	else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
   1725		always_on_cu_num = 8;
   1726	else
   1727		always_on_cu_num = 12;
   1728
   1729	mutex_lock(&adev->grbm_idx_mutex);
   1730	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   1731		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   1732			mask = 1;
   1733			cu_bitmap = 0;
   1734			counter = 0;
   1735			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
   1736
   1737			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
   1738				if (cu_info->bitmap[i][j] & mask) {
   1739					if (counter == pg_always_on_cu_num)
   1740						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
   1741					if (counter < always_on_cu_num)
   1742						cu_bitmap |= mask;
   1743					else
   1744						break;
   1745					counter++;
   1746				}
   1747				mask <<= 1;
   1748			}
   1749
   1750			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
   1751			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
   1752		}
   1753	}
   1754	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   1755	mutex_unlock(&adev->grbm_idx_mutex);
   1756}
   1757
   1758static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
   1759{
   1760	uint32_t data;
   1761
   1762	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
   1763	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
   1764	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
   1765	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
   1766	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
   1767
   1768	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
   1769	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
   1770
   1771	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
   1772	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
   1773
   1774	mutex_lock(&adev->grbm_idx_mutex);
   1775	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
   1776	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   1777	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
   1778
   1779	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
   1780	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
   1781	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
   1782	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
   1783	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
   1784
   1785	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
   1786	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
   1787	data &= 0x0000FFFF;
   1788	data |= 0x00C00000;
   1789	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
   1790
   1791	/*
   1792	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
   1793	 * programmed in gfx_v9_0_init_always_on_cu_mask()
   1794	 */
   1795
   1796	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
   1797	 * but used for RLC_LB_CNTL configuration */
   1798	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
   1799	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
   1800	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
   1801	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
   1802	mutex_unlock(&adev->grbm_idx_mutex);
   1803
   1804	gfx_v9_0_init_always_on_cu_mask(adev);
   1805}
   1806
   1807static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
   1808{
   1809	uint32_t data;
   1810
   1811	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
   1812	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
   1813	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
   1814	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
   1815	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
   1816
   1817	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
   1818	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
   1819
   1820	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
   1821	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
   1822
   1823	mutex_lock(&adev->grbm_idx_mutex);
   1824	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
   1825	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   1826	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
   1827
   1828	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
   1829	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
   1830	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
   1831	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
   1832	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
   1833
   1834	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
   1835	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
   1836	data &= 0x0000FFFF;
   1837	data |= 0x00C00000;
   1838	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
   1839
   1840	/*
   1841	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
   1842	 * programmed in gfx_v9_0_init_always_on_cu_mask()
   1843	 */
   1844
   1845	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
   1846	 * but used for RLC_LB_CNTL configuration */
   1847	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
   1848	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
   1849	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
   1850	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
   1851	mutex_unlock(&adev->grbm_idx_mutex);
   1852
   1853	gfx_v9_0_init_always_on_cu_mask(adev);
   1854}
   1855
   1856static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
   1857{
   1858	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
   1859}
   1860
   1861static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
   1862{
   1863	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
   1864		return 5;
   1865	else
   1866		return 4;
   1867}
   1868
   1869static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
   1870{
   1871	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
   1872
   1873	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
   1874	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
   1875	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
   1876	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
   1877	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
   1878	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
   1879	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
   1880	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
   1881	adev->gfx.rlc.rlcg_reg_access_supported = true;
   1882}
   1883
   1884static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
   1885{
   1886	const struct cs_section_def *cs_data;
   1887	int r;
   1888
   1889	adev->gfx.rlc.cs_data = gfx9_cs_data;
   1890
   1891	cs_data = adev->gfx.rlc.cs_data;
   1892
   1893	if (cs_data) {
   1894		/* init clear state block */
   1895		r = amdgpu_gfx_rlc_init_csb(adev);
   1896		if (r)
   1897			return r;
   1898	}
   1899
   1900	if (adev->flags & AMD_IS_APU) {
   1901		/* TODO: double check the cp_table_size for RV */
   1902		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
   1903		r = amdgpu_gfx_rlc_init_cpt(adev);
   1904		if (r)
   1905			return r;
   1906	}
   1907
   1908	switch (adev->ip_versions[GC_HWIP][0]) {
   1909	case IP_VERSION(9, 2, 2):
   1910	case IP_VERSION(9, 1, 0):
   1911		gfx_v9_0_init_lbpw(adev);
   1912		break;
   1913	case IP_VERSION(9, 4, 0):
   1914		gfx_v9_4_init_lbpw(adev);
   1915		break;
   1916	default:
   1917		break;
   1918	}
   1919
   1920	/* init spm vmid with 0xf */
   1921	if (adev->gfx.rlc.funcs->update_spm_vmid)
   1922		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
   1923
   1924	return 0;
   1925}
   1926
   1927static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
   1928{
   1929	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
   1930	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
   1931}
   1932
   1933static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
   1934{
   1935	int r;
   1936	u32 *hpd;
   1937	const __le32 *fw_data;
   1938	unsigned fw_size;
   1939	u32 *fw;
   1940	size_t mec_hpd_size;
   1941
   1942	const struct gfx_firmware_header_v1_0 *mec_hdr;
   1943
   1944	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
   1945
   1946	/* take ownership of the relevant compute queues */
   1947	amdgpu_gfx_compute_queue_acquire(adev);
   1948	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
   1949	if (mec_hpd_size) {
   1950		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
   1951					      AMDGPU_GEM_DOMAIN_VRAM,
   1952					      &adev->gfx.mec.hpd_eop_obj,
   1953					      &adev->gfx.mec.hpd_eop_gpu_addr,
   1954					      (void **)&hpd);
   1955		if (r) {
   1956			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
   1957			gfx_v9_0_mec_fini(adev);
   1958			return r;
   1959		}
   1960
   1961		memset(hpd, 0, mec_hpd_size);
   1962
   1963		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
   1964		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
   1965	}
   1966
   1967	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
   1968
   1969	fw_data = (const __le32 *)
   1970		(adev->gfx.mec_fw->data +
   1971		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
   1972	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
   1973
   1974	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
   1975				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
   1976				      &adev->gfx.mec.mec_fw_obj,
   1977				      &adev->gfx.mec.mec_fw_gpu_addr,
   1978				      (void **)&fw);
   1979	if (r) {
   1980		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
   1981		gfx_v9_0_mec_fini(adev);
   1982		return r;
   1983	}
   1984
   1985	memcpy(fw, fw_data, fw_size);
   1986
   1987	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
   1988	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
   1989
   1990	return 0;
   1991}
   1992
   1993static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
   1994{
   1995	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
   1996		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
   1997		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
   1998		(address << SQ_IND_INDEX__INDEX__SHIFT) |
   1999		(SQ_IND_INDEX__FORCE_READ_MASK));
   2000	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
   2001}
   2002
   2003static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
   2004			   uint32_t wave, uint32_t thread,
   2005			   uint32_t regno, uint32_t num, uint32_t *out)
   2006{
   2007	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
   2008		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
   2009		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
   2010		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
   2011		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
   2012		(SQ_IND_INDEX__FORCE_READ_MASK) |
   2013		(SQ_IND_INDEX__AUTO_INCR_MASK));
   2014	while (num--)
   2015		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
   2016}
   2017
   2018static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
   2019{
   2020	/* type 1 wave data */
   2021	dst[(*no_fields)++] = 1;
   2022	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
   2023	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
   2024	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
   2025	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
   2026	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
   2027	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
   2028	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
   2029	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
   2030	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
   2031	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
   2032	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
   2033	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
   2034	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
   2035	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
   2036	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
   2037}
   2038
   2039static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
   2040				     uint32_t wave, uint32_t start,
   2041				     uint32_t size, uint32_t *dst)
   2042{
   2043	wave_read_regs(
   2044		adev, simd, wave, 0,
   2045		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
   2046}
   2047
   2048static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
   2049				     uint32_t wave, uint32_t thread,
   2050				     uint32_t start, uint32_t size,
   2051				     uint32_t *dst)
   2052{
   2053	wave_read_regs(
   2054		adev, simd, wave, thread,
   2055		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
   2056}
   2057
   2058static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
   2059				  u32 me, u32 pipe, u32 q, u32 vm)
   2060{
   2061	soc15_grbm_select(adev, me, pipe, q, vm);
   2062}
   2063
   2064static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
   2065        .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
   2066        .select_se_sh = &gfx_v9_0_select_se_sh,
   2067        .read_wave_data = &gfx_v9_0_read_wave_data,
   2068        .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
   2069        .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
   2070        .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
   2071};
   2072
   2073const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
   2074		.ras_error_inject = &gfx_v9_0_ras_error_inject,
   2075		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
   2076		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
   2077};
   2078
   2079static struct amdgpu_gfx_ras gfx_v9_0_ras = {
   2080	.ras_block = {
   2081		.hw_ops = &gfx_v9_0_ras_ops,
   2082	},
   2083};
   2084
   2085static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
   2086{
   2087	u32 gb_addr_config;
   2088	int err;
   2089
   2090	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
   2091
   2092	switch (adev->ip_versions[GC_HWIP][0]) {
   2093	case IP_VERSION(9, 0, 1):
   2094		adev->gfx.config.max_hw_contexts = 8;
   2095		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   2096		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   2097		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   2098		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
   2099		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
   2100		break;
   2101	case IP_VERSION(9, 2, 1):
   2102		adev->gfx.config.max_hw_contexts = 8;
   2103		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   2104		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   2105		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   2106		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
   2107		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
   2108		DRM_INFO("fix gfx.config for vega12\n");
   2109		break;
   2110	case IP_VERSION(9, 4, 0):
   2111		adev->gfx.ras = &gfx_v9_0_ras;
   2112		adev->gfx.config.max_hw_contexts = 8;
   2113		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   2114		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   2115		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   2116		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
   2117		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
   2118		gb_addr_config &= ~0xf3e777ff;
   2119		gb_addr_config |= 0x22014042;
   2120		/* check vbios table if gpu info is not available */
   2121		err = amdgpu_atomfirmware_get_gfx_info(adev);
   2122		if (err)
   2123			return err;
   2124		break;
   2125	case IP_VERSION(9, 2, 2):
   2126	case IP_VERSION(9, 1, 0):
   2127		adev->gfx.config.max_hw_contexts = 8;
   2128		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   2129		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   2130		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   2131		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
   2132		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
   2133			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
   2134		else
   2135			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
   2136		break;
   2137	case IP_VERSION(9, 4, 1):
   2138		adev->gfx.ras = &gfx_v9_4_ras;
   2139		adev->gfx.config.max_hw_contexts = 8;
   2140		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   2141		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   2142		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   2143		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
   2144		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
   2145		gb_addr_config &= ~0xf3e777ff;
   2146		gb_addr_config |= 0x22014042;
   2147		break;
   2148	case IP_VERSION(9, 3, 0):
   2149		adev->gfx.config.max_hw_contexts = 8;
   2150		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   2151		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   2152		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
   2153		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
   2154		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
   2155		gb_addr_config &= ~0xf3e777ff;
   2156		gb_addr_config |= 0x22010042;
   2157		break;
   2158	case IP_VERSION(9, 4, 2):
   2159		adev->gfx.ras = &gfx_v9_4_2_ras;
   2160		adev->gfx.config.max_hw_contexts = 8;
   2161		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   2162		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   2163		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   2164		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
   2165		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
   2166		gb_addr_config &= ~0xf3e777ff;
   2167		gb_addr_config |= 0x22014042;
   2168		/* check vbios table if gpu info is not available */
   2169		err = amdgpu_atomfirmware_get_gfx_info(adev);
   2170		if (err)
   2171			return err;
   2172		break;
   2173	default:
   2174		BUG();
   2175		break;
   2176	}
   2177
   2178	if (adev->gfx.ras) {
   2179		err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block);
   2180		if (err) {
   2181			DRM_ERROR("Failed to register gfx ras block!\n");
   2182			return err;
   2183		}
   2184
   2185		strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
   2186		adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
   2187		adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
   2188		adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm;
   2189
   2190		/* If not define special ras_late_init function, use gfx default ras_late_init */
   2191		if (!adev->gfx.ras->ras_block.ras_late_init)
   2192			adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
   2193
   2194		/* If not defined special ras_cb function, use default ras_cb */
   2195		if (!adev->gfx.ras->ras_block.ras_cb)
   2196			adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
   2197	}
   2198
   2199	adev->gfx.config.gb_addr_config = gb_addr_config;
   2200
   2201	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
   2202			REG_GET_FIELD(
   2203					adev->gfx.config.gb_addr_config,
   2204					GB_ADDR_CONFIG,
   2205					NUM_PIPES);
   2206
   2207	adev->gfx.config.max_tile_pipes =
   2208		adev->gfx.config.gb_addr_config_fields.num_pipes;
   2209
   2210	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
   2211			REG_GET_FIELD(
   2212					adev->gfx.config.gb_addr_config,
   2213					GB_ADDR_CONFIG,
   2214					NUM_BANKS);
   2215	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
   2216			REG_GET_FIELD(
   2217					adev->gfx.config.gb_addr_config,
   2218					GB_ADDR_CONFIG,
   2219					MAX_COMPRESSED_FRAGS);
   2220	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
   2221			REG_GET_FIELD(
   2222					adev->gfx.config.gb_addr_config,
   2223					GB_ADDR_CONFIG,
   2224					NUM_RB_PER_SE);
   2225	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
   2226			REG_GET_FIELD(
   2227					adev->gfx.config.gb_addr_config,
   2228					GB_ADDR_CONFIG,
   2229					NUM_SHADER_ENGINES);
   2230	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
   2231			REG_GET_FIELD(
   2232					adev->gfx.config.gb_addr_config,
   2233					GB_ADDR_CONFIG,
   2234					PIPE_INTERLEAVE_SIZE));
   2235
   2236	return 0;
   2237}
   2238
   2239static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
   2240				      int mec, int pipe, int queue)
   2241{
   2242	unsigned irq_type;
   2243	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
   2244	unsigned int hw_prio;
   2245
   2246	ring = &adev->gfx.compute_ring[ring_id];
   2247
   2248	/* mec0 is me1 */
   2249	ring->me = mec + 1;
   2250	ring->pipe = pipe;
   2251	ring->queue = queue;
   2252
   2253	ring->ring_obj = NULL;
   2254	ring->use_doorbell = true;
   2255	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
   2256	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
   2257				+ (ring_id * GFX9_MEC_HPD_SIZE);
   2258	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
   2259
   2260	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
   2261		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
   2262		+ ring->pipe;
   2263	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
   2264			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
   2265	/* type-2 packets are deprecated on MEC, use type-3 instead */
   2266	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
   2267				hw_prio, NULL);
   2268}
   2269
   2270static int gfx_v9_0_sw_init(void *handle)
   2271{
   2272	int i, j, k, r, ring_id;
   2273	struct amdgpu_ring *ring;
   2274	struct amdgpu_kiq *kiq;
   2275	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   2276
   2277	switch (adev->ip_versions[GC_HWIP][0]) {
   2278	case IP_VERSION(9, 0, 1):
   2279	case IP_VERSION(9, 2, 1):
   2280	case IP_VERSION(9, 4, 0):
   2281	case IP_VERSION(9, 2, 2):
   2282	case IP_VERSION(9, 1, 0):
   2283	case IP_VERSION(9, 4, 1):
   2284	case IP_VERSION(9, 3, 0):
   2285	case IP_VERSION(9, 4, 2):
   2286		adev->gfx.mec.num_mec = 2;
   2287		break;
   2288	default:
   2289		adev->gfx.mec.num_mec = 1;
   2290		break;
   2291	}
   2292
   2293	adev->gfx.mec.num_pipe_per_mec = 4;
   2294	adev->gfx.mec.num_queue_per_pipe = 8;
   2295
   2296	/* EOP Event */
   2297	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
   2298	if (r)
   2299		return r;
   2300
   2301	/* Privileged reg */
   2302	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
   2303			      &adev->gfx.priv_reg_irq);
   2304	if (r)
   2305		return r;
   2306
   2307	/* Privileged inst */
   2308	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
   2309			      &adev->gfx.priv_inst_irq);
   2310	if (r)
   2311		return r;
   2312
   2313	/* ECC error */
   2314	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
   2315			      &adev->gfx.cp_ecc_error_irq);
   2316	if (r)
   2317		return r;
   2318
   2319	/* FUE error */
   2320	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
   2321			      &adev->gfx.cp_ecc_error_irq);
   2322	if (r)
   2323		return r;
   2324
   2325	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
   2326
   2327	r = gfx_v9_0_init_microcode(adev);
   2328	if (r) {
   2329		DRM_ERROR("Failed to load gfx firmware!\n");
   2330		return r;
   2331	}
   2332
   2333	if (adev->gfx.rlc.funcs) {
   2334		if (adev->gfx.rlc.funcs->init) {
   2335			r = adev->gfx.rlc.funcs->init(adev);
   2336			if (r) {
   2337				dev_err(adev->dev, "Failed to init rlc BOs!\n");
   2338				return r;
   2339			}
   2340		}
   2341	}
   2342
   2343	r = gfx_v9_0_mec_init(adev);
   2344	if (r) {
   2345		DRM_ERROR("Failed to init MEC BOs!\n");
   2346		return r;
   2347	}
   2348
   2349	/* set up the gfx ring */
   2350	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
   2351		ring = &adev->gfx.gfx_ring[i];
   2352		ring->ring_obj = NULL;
   2353		if (!i)
   2354			sprintf(ring->name, "gfx");
   2355		else
   2356			sprintf(ring->name, "gfx_%d", i);
   2357		ring->use_doorbell = true;
   2358		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
   2359		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
   2360				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
   2361				     AMDGPU_RING_PRIO_DEFAULT, NULL);
   2362		if (r)
   2363			return r;
   2364	}
   2365
   2366	/* set up the compute queues - allocate horizontally across pipes */
   2367	ring_id = 0;
   2368	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
   2369		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
   2370			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
   2371				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
   2372					continue;
   2373
   2374				r = gfx_v9_0_compute_ring_init(adev,
   2375							       ring_id,
   2376							       i, k, j);
   2377				if (r)
   2378					return r;
   2379
   2380				ring_id++;
   2381			}
   2382		}
   2383	}
   2384
   2385	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
   2386	if (r) {
   2387		DRM_ERROR("Failed to init KIQ BOs!\n");
   2388		return r;
   2389	}
   2390
   2391	kiq = &adev->gfx.kiq;
   2392	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
   2393	if (r)
   2394		return r;
   2395
   2396	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
   2397	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
   2398	if (r)
   2399		return r;
   2400
   2401	adev->gfx.ce_ram_size = 0x8000;
   2402
   2403	r = gfx_v9_0_gpu_early_init(adev);
   2404	if (r)
   2405		return r;
   2406
   2407	return 0;
   2408}
   2409
   2410
   2411static int gfx_v9_0_sw_fini(void *handle)
   2412{
   2413	int i;
   2414	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   2415
   2416	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
   2417		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
   2418	for (i = 0; i < adev->gfx.num_compute_rings; i++)
   2419		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
   2420
   2421	amdgpu_gfx_mqd_sw_fini(adev);
   2422	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
   2423	amdgpu_gfx_kiq_fini(adev);
   2424
   2425	gfx_v9_0_mec_fini(adev);
   2426	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
   2427				&adev->gfx.rlc.clear_state_gpu_addr,
   2428				(void **)&adev->gfx.rlc.cs_ptr);
   2429	if (adev->flags & AMD_IS_APU) {
   2430		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
   2431				&adev->gfx.rlc.cp_table_gpu_addr,
   2432				(void **)&adev->gfx.rlc.cp_table_ptr);
   2433	}
   2434	gfx_v9_0_free_microcode(adev);
   2435
   2436	return 0;
   2437}
   2438
   2439
   2440static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
   2441{
   2442	/* TODO */
   2443}
   2444
   2445void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
   2446			   u32 instance)
   2447{
   2448	u32 data;
   2449
   2450	if (instance == 0xffffffff)
   2451		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
   2452	else
   2453		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
   2454
   2455	if (se_num == 0xffffffff)
   2456		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
   2457	else
   2458		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
   2459
   2460	if (sh_num == 0xffffffff)
   2461		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
   2462	else
   2463		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
   2464
   2465	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
   2466}
   2467
   2468static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
   2469{
   2470	u32 data, mask;
   2471
   2472	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
   2473	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
   2474
   2475	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
   2476	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
   2477
   2478	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
   2479					 adev->gfx.config.max_sh_per_se);
   2480
   2481	return (~data) & mask;
   2482}
   2483
   2484static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
   2485{
   2486	int i, j;
   2487	u32 data;
   2488	u32 active_rbs = 0;
   2489	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
   2490					adev->gfx.config.max_sh_per_se;
   2491
   2492	mutex_lock(&adev->grbm_idx_mutex);
   2493	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   2494		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   2495			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
   2496			data = gfx_v9_0_get_rb_active_bitmap(adev);
   2497			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
   2498					       rb_bitmap_width_per_sh);
   2499		}
   2500	}
   2501	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   2502	mutex_unlock(&adev->grbm_idx_mutex);
   2503
   2504	adev->gfx.config.backend_enable_mask = active_rbs;
   2505	adev->gfx.config.num_rbs = hweight32(active_rbs);
   2506}
   2507
   2508#define DEFAULT_SH_MEM_BASES	(0x6000)
   2509static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
   2510{
   2511	int i;
   2512	uint32_t sh_mem_config;
   2513	uint32_t sh_mem_bases;
   2514
   2515	/*
   2516	 * Configure apertures:
   2517	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
   2518	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
   2519	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
   2520	 */
   2521	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
   2522
   2523	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
   2524			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
   2525			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
   2526
   2527	mutex_lock(&adev->srbm_mutex);
   2528	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
   2529		soc15_grbm_select(adev, 0, 0, 0, i);
   2530		/* CP and shaders */
   2531		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
   2532		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
   2533	}
   2534	soc15_grbm_select(adev, 0, 0, 0, 0);
   2535	mutex_unlock(&adev->srbm_mutex);
   2536
   2537	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
   2538	   access. These should be enabled by FW for target VMIDs. */
   2539	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
   2540		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
   2541		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
   2542		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
   2543		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
   2544	}
   2545}
   2546
   2547static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
   2548{
   2549	int vmid;
   2550
   2551	/*
   2552	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
   2553	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
   2554	 * the driver can enable them for graphics. VMID0 should maintain
   2555	 * access so that HWS firmware can save/restore entries.
   2556	 */
   2557	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
   2558		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
   2559		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
   2560		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
   2561		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
   2562	}
   2563}
   2564
   2565static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
   2566{
   2567	uint32_t tmp;
   2568
   2569	switch (adev->ip_versions[GC_HWIP][0]) {
   2570	case IP_VERSION(9, 4, 1):
   2571		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
   2572		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
   2573					DISABLE_BARRIER_WAITCNT, 1);
   2574		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
   2575		break;
   2576	default:
   2577		break;
   2578	}
   2579}
   2580
   2581static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
   2582{
   2583	u32 tmp;
   2584	int i;
   2585
   2586	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
   2587
   2588	gfx_v9_0_tiling_mode_table_init(adev);
   2589
   2590	gfx_v9_0_setup_rb(adev);
   2591	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
   2592	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
   2593
   2594	/* XXX SH_MEM regs */
   2595	/* where to put LDS, scratch, GPUVM in FSA64 space */
   2596	mutex_lock(&adev->srbm_mutex);
   2597	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
   2598		soc15_grbm_select(adev, 0, 0, 0, i);
   2599		/* CP and shaders */
   2600		if (i == 0) {
   2601			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
   2602					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
   2603			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
   2604					    !!adev->gmc.noretry);
   2605			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
   2606			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
   2607		} else {
   2608			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
   2609					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
   2610			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
   2611					    !!adev->gmc.noretry);
   2612			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
   2613			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
   2614				(adev->gmc.private_aperture_start >> 48));
   2615			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
   2616				(adev->gmc.shared_aperture_start >> 48));
   2617			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
   2618		}
   2619	}
   2620	soc15_grbm_select(adev, 0, 0, 0, 0);
   2621
   2622	mutex_unlock(&adev->srbm_mutex);
   2623
   2624	gfx_v9_0_init_compute_vmid(adev);
   2625	gfx_v9_0_init_gds_vmid(adev);
   2626	gfx_v9_0_init_sq_config(adev);
   2627}
   2628
   2629static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
   2630{
   2631	u32 i, j, k;
   2632	u32 mask;
   2633
   2634	mutex_lock(&adev->grbm_idx_mutex);
   2635	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   2636		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   2637			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
   2638			for (k = 0; k < adev->usec_timeout; k++) {
   2639				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
   2640					break;
   2641				udelay(1);
   2642			}
   2643			if (k == adev->usec_timeout) {
   2644				gfx_v9_0_select_se_sh(adev, 0xffffffff,
   2645						      0xffffffff, 0xffffffff);
   2646				mutex_unlock(&adev->grbm_idx_mutex);
   2647				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
   2648					 i, j);
   2649				return;
   2650			}
   2651		}
   2652	}
   2653	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   2654	mutex_unlock(&adev->grbm_idx_mutex);
   2655
   2656	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
   2657		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
   2658		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
   2659		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
   2660	for (k = 0; k < adev->usec_timeout; k++) {
   2661		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
   2662			break;
   2663		udelay(1);
   2664	}
   2665}
   2666
   2667static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
   2668					       bool enable)
   2669{
   2670	u32 tmp;
   2671
   2672	/* These interrupts should be enabled to drive DS clock */
   2673
   2674	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
   2675
   2676	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
   2677	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
   2678	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
   2679	if(adev->gfx.num_gfx_rings)
   2680		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
   2681
   2682	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
   2683}
   2684
   2685static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
   2686{
   2687	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
   2688	/* csib */
   2689	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
   2690			adev->gfx.rlc.clear_state_gpu_addr >> 32);
   2691	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
   2692			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
   2693	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
   2694			adev->gfx.rlc.clear_state_size);
   2695}
   2696
   2697static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
   2698				int indirect_offset,
   2699				int list_size,
   2700				int *unique_indirect_regs,
   2701				int unique_indirect_reg_count,
   2702				int *indirect_start_offsets,
   2703				int *indirect_start_offsets_count,
   2704				int max_start_offsets_count)
   2705{
   2706	int idx;
   2707
   2708	for (; indirect_offset < list_size; indirect_offset++) {
   2709		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
   2710		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
   2711		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
   2712
   2713		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
   2714			indirect_offset += 2;
   2715
   2716			/* look for the matching indice */
   2717			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
   2718				if (unique_indirect_regs[idx] ==
   2719					register_list_format[indirect_offset] ||
   2720					!unique_indirect_regs[idx])
   2721					break;
   2722			}
   2723
   2724			BUG_ON(idx >= unique_indirect_reg_count);
   2725
   2726			if (!unique_indirect_regs[idx])
   2727				unique_indirect_regs[idx] = register_list_format[indirect_offset];
   2728
   2729			indirect_offset++;
   2730		}
   2731	}
   2732}
   2733
   2734static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
   2735{
   2736	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
   2737	int unique_indirect_reg_count = 0;
   2738
   2739	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
   2740	int indirect_start_offsets_count = 0;
   2741
   2742	int list_size = 0;
   2743	int i = 0, j = 0;
   2744	u32 tmp = 0;
   2745
   2746	u32 *register_list_format =
   2747		kmemdup(adev->gfx.rlc.register_list_format,
   2748			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
   2749	if (!register_list_format)
   2750		return -ENOMEM;
   2751
   2752	/* setup unique_indirect_regs array and indirect_start_offsets array */
   2753	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
   2754	gfx_v9_1_parse_ind_reg_list(register_list_format,
   2755				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
   2756				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
   2757				    unique_indirect_regs,
   2758				    unique_indirect_reg_count,
   2759				    indirect_start_offsets,
   2760				    &indirect_start_offsets_count,
   2761				    ARRAY_SIZE(indirect_start_offsets));
   2762
   2763	/* enable auto inc in case it is disabled */
   2764	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
   2765	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
   2766	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
   2767
   2768	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
   2769	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
   2770		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
   2771	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
   2772		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
   2773			adev->gfx.rlc.register_restore[i]);
   2774
   2775	/* load indirect register */
   2776	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
   2777		adev->gfx.rlc.reg_list_format_start);
   2778
   2779	/* direct register portion */
   2780	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
   2781		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
   2782			register_list_format[i]);
   2783
   2784	/* indirect register portion */
   2785	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
   2786		if (register_list_format[i] == 0xFFFFFFFF) {
   2787			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
   2788			continue;
   2789		}
   2790
   2791		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
   2792		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
   2793
   2794		for (j = 0; j < unique_indirect_reg_count; j++) {
   2795			if (register_list_format[i] == unique_indirect_regs[j]) {
   2796				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
   2797				break;
   2798			}
   2799		}
   2800
   2801		BUG_ON(j >= unique_indirect_reg_count);
   2802
   2803		i++;
   2804	}
   2805
   2806	/* set save/restore list size */
   2807	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
   2808	list_size = list_size >> 1;
   2809	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
   2810		adev->gfx.rlc.reg_restore_list_size);
   2811	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
   2812
   2813	/* write the starting offsets to RLC scratch ram */
   2814	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
   2815		adev->gfx.rlc.starting_offsets_start);
   2816	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
   2817		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
   2818		       indirect_start_offsets[i]);
   2819
   2820	/* load unique indirect regs*/
   2821	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
   2822		if (unique_indirect_regs[i] != 0) {
   2823			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
   2824			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
   2825			       unique_indirect_regs[i] & 0x3FFFF);
   2826
   2827			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
   2828			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
   2829			       unique_indirect_regs[i] >> 20);
   2830		}
   2831	}
   2832
   2833	kfree(register_list_format);
   2834	return 0;
   2835}
   2836
   2837static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
   2838{
   2839	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
   2840}
   2841
   2842static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
   2843					     bool enable)
   2844{
   2845	uint32_t data = 0;
   2846	uint32_t default_data = 0;
   2847
   2848	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
   2849	if (enable) {
   2850		/* enable GFXIP control over CGPG */
   2851		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
   2852		if(default_data != data)
   2853			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
   2854
   2855		/* update status */
   2856		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
   2857		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
   2858		if(default_data != data)
   2859			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
   2860	} else {
   2861		/* restore GFXIP control over GCPG */
   2862		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
   2863		if(default_data != data)
   2864			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
   2865	}
   2866}
   2867
   2868static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
   2869{
   2870	uint32_t data = 0;
   2871
   2872	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
   2873			      AMD_PG_SUPPORT_GFX_SMG |
   2874			      AMD_PG_SUPPORT_GFX_DMG)) {
   2875		/* init IDLE_POLL_COUNT = 60 */
   2876		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
   2877		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
   2878		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
   2879		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
   2880
   2881		/* init RLC PG Delay */
   2882		data = 0;
   2883		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
   2884		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
   2885		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
   2886		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
   2887		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
   2888
   2889		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
   2890		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
   2891		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
   2892		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
   2893
   2894		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
   2895		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
   2896		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
   2897		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
   2898
   2899		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
   2900		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
   2901
   2902		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
   2903		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
   2904		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
   2905		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
   2906			pwr_10_0_gfxip_control_over_cgpg(adev, true);
   2907	}
   2908}
   2909
   2910static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
   2911						bool enable)
   2912{
   2913	uint32_t data = 0;
   2914	uint32_t default_data = 0;
   2915
   2916	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
   2917	data = REG_SET_FIELD(data, RLC_PG_CNTL,
   2918			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
   2919			     enable ? 1 : 0);
   2920	if (default_data != data)
   2921		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
   2922}
   2923
   2924static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
   2925						bool enable)
   2926{
   2927	uint32_t data = 0;
   2928	uint32_t default_data = 0;
   2929
   2930	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
   2931	data = REG_SET_FIELD(data, RLC_PG_CNTL,
   2932			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
   2933			     enable ? 1 : 0);
   2934	if(default_data != data)
   2935		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
   2936}
   2937
   2938static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
   2939					bool enable)
   2940{
   2941	uint32_t data = 0;
   2942	uint32_t default_data = 0;
   2943
   2944	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
   2945	data = REG_SET_FIELD(data, RLC_PG_CNTL,
   2946			     CP_PG_DISABLE,
   2947			     enable ? 0 : 1);
   2948	if(default_data != data)
   2949		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
   2950}
   2951
   2952static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
   2953						bool enable)
   2954{
   2955	uint32_t data, default_data;
   2956
   2957	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
   2958	data = REG_SET_FIELD(data, RLC_PG_CNTL,
   2959			     GFX_POWER_GATING_ENABLE,
   2960			     enable ? 1 : 0);
   2961	if(default_data != data)
   2962		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
   2963}
   2964
   2965static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
   2966						bool enable)
   2967{
   2968	uint32_t data, default_data;
   2969
   2970	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
   2971	data = REG_SET_FIELD(data, RLC_PG_CNTL,
   2972			     GFX_PIPELINE_PG_ENABLE,
   2973			     enable ? 1 : 0);
   2974	if(default_data != data)
   2975		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
   2976
   2977	if (!enable)
   2978		/* read any GFX register to wake up GFX */
   2979		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
   2980}
   2981
   2982static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
   2983						       bool enable)
   2984{
   2985	uint32_t data, default_data;
   2986
   2987	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
   2988	data = REG_SET_FIELD(data, RLC_PG_CNTL,
   2989			     STATIC_PER_CU_PG_ENABLE,
   2990			     enable ? 1 : 0);
   2991	if(default_data != data)
   2992		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
   2993}
   2994
   2995static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
   2996						bool enable)
   2997{
   2998	uint32_t data, default_data;
   2999
   3000	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
   3001	data = REG_SET_FIELD(data, RLC_PG_CNTL,
   3002			     DYN_PER_CU_PG_ENABLE,
   3003			     enable ? 1 : 0);
   3004	if(default_data != data)
   3005		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
   3006}
   3007
   3008static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
   3009{
   3010	gfx_v9_0_init_csb(adev);
   3011
   3012	/*
   3013	 * Rlc save restore list is workable since v2_1.
   3014	 * And it's needed by gfxoff feature.
   3015	 */
   3016	if (adev->gfx.rlc.is_rlc_v2_1) {
   3017		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
   3018		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
   3019			gfx_v9_1_init_rlc_save_restore_list(adev);
   3020		gfx_v9_0_enable_save_restore_machine(adev);
   3021	}
   3022
   3023	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
   3024			      AMD_PG_SUPPORT_GFX_SMG |
   3025			      AMD_PG_SUPPORT_GFX_DMG |
   3026			      AMD_PG_SUPPORT_CP |
   3027			      AMD_PG_SUPPORT_GDS |
   3028			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
   3029		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
   3030			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
   3031		gfx_v9_0_init_gfx_power_gating(adev);
   3032	}
   3033}
   3034
   3035static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
   3036{
   3037	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
   3038	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
   3039	gfx_v9_0_wait_for_rlc_serdes(adev);
   3040}
   3041
   3042static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
   3043{
   3044	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
   3045	udelay(50);
   3046	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
   3047	udelay(50);
   3048}
   3049
   3050static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
   3051{
   3052#ifdef AMDGPU_RLC_DEBUG_RETRY
   3053	u32 rlc_ucode_ver;
   3054#endif
   3055
   3056	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
   3057	udelay(50);
   3058
   3059	/* carrizo do enable cp interrupt after cp inited */
   3060	if (!(adev->flags & AMD_IS_APU)) {
   3061		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
   3062		udelay(50);
   3063	}
   3064
   3065#ifdef AMDGPU_RLC_DEBUG_RETRY
   3066	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
   3067	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
   3068	if(rlc_ucode_ver == 0x108) {
   3069		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
   3070				rlc_ucode_ver, adev->gfx.rlc_fw_version);
   3071		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
   3072		 * default is 0x9C4 to create a 100us interval */
   3073		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
   3074		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
   3075		 * to disable the page fault retry interrupts, default is
   3076		 * 0x100 (256) */
   3077		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
   3078	}
   3079#endif
   3080}
   3081
   3082static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
   3083{
   3084	const struct rlc_firmware_header_v2_0 *hdr;
   3085	const __le32 *fw_data;
   3086	unsigned i, fw_size;
   3087
   3088	if (!adev->gfx.rlc_fw)
   3089		return -EINVAL;
   3090
   3091	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
   3092	amdgpu_ucode_print_rlc_hdr(&hdr->header);
   3093
   3094	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
   3095			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   3096	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
   3097
   3098	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
   3099			RLCG_UCODE_LOADING_START_ADDRESS);
   3100	for (i = 0; i < fw_size; i++)
   3101		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
   3102	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
   3103
   3104	return 0;
   3105}
   3106
   3107static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
   3108{
   3109	int r;
   3110
   3111	if (amdgpu_sriov_vf(adev)) {
   3112		gfx_v9_0_init_csb(adev);
   3113		return 0;
   3114	}
   3115
   3116	adev->gfx.rlc.funcs->stop(adev);
   3117
   3118	/* disable CG */
   3119	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
   3120
   3121	gfx_v9_0_init_pg(adev);
   3122
   3123	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
   3124		/* legacy rlc firmware loading */
   3125		r = gfx_v9_0_rlc_load_microcode(adev);
   3126		if (r)
   3127			return r;
   3128	}
   3129
   3130	switch (adev->ip_versions[GC_HWIP][0]) {
   3131	case IP_VERSION(9, 2, 2):
   3132	case IP_VERSION(9, 1, 0):
   3133		if (amdgpu_lbpw == 0)
   3134			gfx_v9_0_enable_lbpw(adev, false);
   3135		else
   3136			gfx_v9_0_enable_lbpw(adev, true);
   3137		break;
   3138	case IP_VERSION(9, 4, 0):
   3139		if (amdgpu_lbpw > 0)
   3140			gfx_v9_0_enable_lbpw(adev, true);
   3141		else
   3142			gfx_v9_0_enable_lbpw(adev, false);
   3143		break;
   3144	default:
   3145		break;
   3146	}
   3147
   3148	adev->gfx.rlc.funcs->start(adev);
   3149
   3150	return 0;
   3151}
   3152
   3153static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
   3154{
   3155	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
   3156
   3157	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
   3158	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
   3159	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
   3160	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
   3161	udelay(50);
   3162}
   3163
   3164static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
   3165{
   3166	const struct gfx_firmware_header_v1_0 *pfp_hdr;
   3167	const struct gfx_firmware_header_v1_0 *ce_hdr;
   3168	const struct gfx_firmware_header_v1_0 *me_hdr;
   3169	const __le32 *fw_data;
   3170	unsigned i, fw_size;
   3171
   3172	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
   3173		return -EINVAL;
   3174
   3175	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
   3176		adev->gfx.pfp_fw->data;
   3177	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
   3178		adev->gfx.ce_fw->data;
   3179	me_hdr = (const struct gfx_firmware_header_v1_0 *)
   3180		adev->gfx.me_fw->data;
   3181
   3182	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
   3183	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
   3184	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
   3185
   3186	gfx_v9_0_cp_gfx_enable(adev, false);
   3187
   3188	/* PFP */
   3189	fw_data = (const __le32 *)
   3190		(adev->gfx.pfp_fw->data +
   3191		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
   3192	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
   3193	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
   3194	for (i = 0; i < fw_size; i++)
   3195		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
   3196	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
   3197
   3198	/* CE */
   3199	fw_data = (const __le32 *)
   3200		(adev->gfx.ce_fw->data +
   3201		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
   3202	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
   3203	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
   3204	for (i = 0; i < fw_size; i++)
   3205		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
   3206	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
   3207
   3208	/* ME */
   3209	fw_data = (const __le32 *)
   3210		(adev->gfx.me_fw->data +
   3211		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
   3212	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
   3213	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
   3214	for (i = 0; i < fw_size; i++)
   3215		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
   3216	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
   3217
   3218	return 0;
   3219}
   3220
   3221static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
   3222{
   3223	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
   3224	const struct cs_section_def *sect = NULL;
   3225	const struct cs_extent_def *ext = NULL;
   3226	int r, i, tmp;
   3227
   3228	/* init the CP */
   3229	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
   3230	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
   3231
   3232	gfx_v9_0_cp_gfx_enable(adev, true);
   3233
   3234	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
   3235	if (r) {
   3236		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
   3237		return r;
   3238	}
   3239
   3240	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   3241	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
   3242
   3243	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   3244	amdgpu_ring_write(ring, 0x80000000);
   3245	amdgpu_ring_write(ring, 0x80000000);
   3246
   3247	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
   3248		for (ext = sect->section; ext->extent != NULL; ++ext) {
   3249			if (sect->id == SECT_CONTEXT) {
   3250				amdgpu_ring_write(ring,
   3251				       PACKET3(PACKET3_SET_CONTEXT_REG,
   3252					       ext->reg_count));
   3253				amdgpu_ring_write(ring,
   3254				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
   3255				for (i = 0; i < ext->reg_count; i++)
   3256					amdgpu_ring_write(ring, ext->extent[i]);
   3257			}
   3258		}
   3259	}
   3260
   3261	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   3262	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
   3263
   3264	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
   3265	amdgpu_ring_write(ring, 0);
   3266
   3267	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
   3268	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
   3269	amdgpu_ring_write(ring, 0x8000);
   3270	amdgpu_ring_write(ring, 0x8000);
   3271
   3272	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
   3273	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
   3274		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
   3275	amdgpu_ring_write(ring, tmp);
   3276	amdgpu_ring_write(ring, 0);
   3277
   3278	amdgpu_ring_commit(ring);
   3279
   3280	return 0;
   3281}
   3282
   3283static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
   3284{
   3285	struct amdgpu_ring *ring;
   3286	u32 tmp;
   3287	u32 rb_bufsz;
   3288	u64 rb_addr, rptr_addr, wptr_gpu_addr;
   3289
   3290	/* Set the write pointer delay */
   3291	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
   3292
   3293	/* set the RB to use vmid 0 */
   3294	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
   3295
   3296	/* Set ring buffer size */
   3297	ring = &adev->gfx.gfx_ring[0];
   3298	rb_bufsz = order_base_2(ring->ring_size / 8);
   3299	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
   3300	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
   3301#ifdef __BIG_ENDIAN
   3302	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
   3303#endif
   3304	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
   3305
   3306	/* Initialize the ring buffer's write pointers */
   3307	ring->wptr = 0;
   3308	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
   3309	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
   3310
   3311	/* set the wb address wether it's enabled or not */
   3312	rptr_addr = ring->rptr_gpu_addr;
   3313	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
   3314	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
   3315
   3316	wptr_gpu_addr = ring->wptr_gpu_addr;
   3317	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
   3318	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
   3319
   3320	mdelay(1);
   3321	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
   3322
   3323	rb_addr = ring->gpu_addr >> 8;
   3324	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
   3325	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
   3326
   3327	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
   3328	if (ring->use_doorbell) {
   3329		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
   3330				    DOORBELL_OFFSET, ring->doorbell_index);
   3331		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
   3332				    DOORBELL_EN, 1);
   3333	} else {
   3334		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
   3335	}
   3336	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
   3337
   3338	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
   3339			DOORBELL_RANGE_LOWER, ring->doorbell_index);
   3340	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
   3341
   3342	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
   3343		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
   3344
   3345
   3346	/* start the ring */
   3347	gfx_v9_0_cp_gfx_start(adev);
   3348	ring->sched.ready = true;
   3349
   3350	return 0;
   3351}
   3352
   3353static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
   3354{
   3355	if (enable) {
   3356		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
   3357	} else {
   3358		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
   3359			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
   3360		adev->gfx.kiq.ring.sched.ready = false;
   3361	}
   3362	udelay(50);
   3363}
   3364
   3365static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
   3366{
   3367	const struct gfx_firmware_header_v1_0 *mec_hdr;
   3368	const __le32 *fw_data;
   3369	unsigned i;
   3370	u32 tmp;
   3371
   3372	if (!adev->gfx.mec_fw)
   3373		return -EINVAL;
   3374
   3375	gfx_v9_0_cp_compute_enable(adev, false);
   3376
   3377	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
   3378	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
   3379
   3380	fw_data = (const __le32 *)
   3381		(adev->gfx.mec_fw->data +
   3382		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
   3383	tmp = 0;
   3384	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
   3385	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
   3386	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
   3387
   3388	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
   3389		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
   3390	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
   3391		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
   3392
   3393	/* MEC1 */
   3394	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
   3395			 mec_hdr->jt_offset);
   3396	for (i = 0; i < mec_hdr->jt_size; i++)
   3397		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
   3398			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
   3399
   3400	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
   3401			adev->gfx.mec_fw_version);
   3402	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
   3403
   3404	return 0;
   3405}
   3406
   3407/* KIQ functions */
   3408static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
   3409{
   3410	uint32_t tmp;
   3411	struct amdgpu_device *adev = ring->adev;
   3412
   3413	/* tell RLC which is KIQ queue */
   3414	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
   3415	tmp &= 0xffffff00;
   3416	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
   3417	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
   3418	tmp |= 0x80;
   3419	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
   3420}
   3421
   3422static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
   3423{
   3424	struct amdgpu_device *adev = ring->adev;
   3425
   3426	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
   3427		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
   3428			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
   3429			mqd->cp_hqd_queue_priority =
   3430				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
   3431		}
   3432	}
   3433}
   3434
   3435static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
   3436{
   3437	struct amdgpu_device *adev = ring->adev;
   3438	struct v9_mqd *mqd = ring->mqd_ptr;
   3439	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
   3440	uint32_t tmp;
   3441
   3442	mqd->header = 0xC0310800;
   3443	mqd->compute_pipelinestat_enable = 0x00000001;
   3444	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
   3445	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
   3446	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
   3447	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
   3448	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
   3449	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
   3450	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
   3451	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
   3452	mqd->compute_misc_reserved = 0x00000003;
   3453
   3454	mqd->dynamic_cu_mask_addr_lo =
   3455		lower_32_bits(ring->mqd_gpu_addr
   3456			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
   3457	mqd->dynamic_cu_mask_addr_hi =
   3458		upper_32_bits(ring->mqd_gpu_addr
   3459			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
   3460
   3461	eop_base_addr = ring->eop_gpu_addr >> 8;
   3462	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
   3463	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
   3464
   3465	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
   3466	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
   3467	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
   3468			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
   3469
   3470	mqd->cp_hqd_eop_control = tmp;
   3471
   3472	/* enable doorbell? */
   3473	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
   3474
   3475	if (ring->use_doorbell) {
   3476		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   3477				    DOORBELL_OFFSET, ring->doorbell_index);
   3478		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   3479				    DOORBELL_EN, 1);
   3480		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   3481				    DOORBELL_SOURCE, 0);
   3482		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   3483				    DOORBELL_HIT, 0);
   3484	} else {
   3485		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   3486					 DOORBELL_EN, 0);
   3487	}
   3488
   3489	mqd->cp_hqd_pq_doorbell_control = tmp;
   3490
   3491	/* disable the queue if it's active */
   3492	ring->wptr = 0;
   3493	mqd->cp_hqd_dequeue_request = 0;
   3494	mqd->cp_hqd_pq_rptr = 0;
   3495	mqd->cp_hqd_pq_wptr_lo = 0;
   3496	mqd->cp_hqd_pq_wptr_hi = 0;
   3497
   3498	/* set the pointer to the MQD */
   3499	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
   3500	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
   3501
   3502	/* set MQD vmid to 0 */
   3503	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
   3504	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
   3505	mqd->cp_mqd_control = tmp;
   3506
   3507	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
   3508	hqd_gpu_addr = ring->gpu_addr >> 8;
   3509	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
   3510	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
   3511
   3512	/* set up the HQD, this is similar to CP_RB0_CNTL */
   3513	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
   3514	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
   3515			    (order_base_2(ring->ring_size / 4) - 1));
   3516	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
   3517			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
   3518#ifdef __BIG_ENDIAN
   3519	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
   3520#endif
   3521	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
   3522	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
   3523	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
   3524	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
   3525	mqd->cp_hqd_pq_control = tmp;
   3526
   3527	/* set the wb address whether it's enabled or not */
   3528	wb_gpu_addr = ring->rptr_gpu_addr;
   3529	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
   3530	mqd->cp_hqd_pq_rptr_report_addr_hi =
   3531		upper_32_bits(wb_gpu_addr) & 0xffff;
   3532
   3533	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
   3534	wb_gpu_addr = ring->wptr_gpu_addr;
   3535	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
   3536	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
   3537
   3538	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
   3539	ring->wptr = 0;
   3540	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
   3541
   3542	/* set the vmid for the queue */
   3543	mqd->cp_hqd_vmid = 0;
   3544
   3545	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
   3546	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
   3547	mqd->cp_hqd_persistent_state = tmp;
   3548
   3549	/* set MIN_IB_AVAIL_SIZE */
   3550	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
   3551	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
   3552	mqd->cp_hqd_ib_control = tmp;
   3553
   3554	/* set static priority for a queue/ring */
   3555	gfx_v9_0_mqd_set_priority(ring, mqd);
   3556	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
   3557
   3558	/* map_queues packet doesn't need activate the queue,
   3559	 * so only kiq need set this field.
   3560	 */
   3561	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
   3562		mqd->cp_hqd_active = 1;
   3563
   3564	return 0;
   3565}
   3566
   3567static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
   3568{
   3569	struct amdgpu_device *adev = ring->adev;
   3570	struct v9_mqd *mqd = ring->mqd_ptr;
   3571	int j;
   3572
   3573	/* disable wptr polling */
   3574	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
   3575
   3576	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
   3577	       mqd->cp_hqd_eop_base_addr_lo);
   3578	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
   3579	       mqd->cp_hqd_eop_base_addr_hi);
   3580
   3581	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
   3582	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
   3583	       mqd->cp_hqd_eop_control);
   3584
   3585	/* enable doorbell? */
   3586	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
   3587	       mqd->cp_hqd_pq_doorbell_control);
   3588
   3589	/* disable the queue if it's active */
   3590	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
   3591		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
   3592		for (j = 0; j < adev->usec_timeout; j++) {
   3593			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
   3594				break;
   3595			udelay(1);
   3596		}
   3597		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
   3598		       mqd->cp_hqd_dequeue_request);
   3599		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
   3600		       mqd->cp_hqd_pq_rptr);
   3601		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
   3602		       mqd->cp_hqd_pq_wptr_lo);
   3603		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
   3604		       mqd->cp_hqd_pq_wptr_hi);
   3605	}
   3606
   3607	/* set the pointer to the MQD */
   3608	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
   3609	       mqd->cp_mqd_base_addr_lo);
   3610	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
   3611	       mqd->cp_mqd_base_addr_hi);
   3612
   3613	/* set MQD vmid to 0 */
   3614	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
   3615	       mqd->cp_mqd_control);
   3616
   3617	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
   3618	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
   3619	       mqd->cp_hqd_pq_base_lo);
   3620	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
   3621	       mqd->cp_hqd_pq_base_hi);
   3622
   3623	/* set up the HQD, this is similar to CP_RB0_CNTL */
   3624	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
   3625	       mqd->cp_hqd_pq_control);
   3626
   3627	/* set the wb address whether it's enabled or not */
   3628	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
   3629				mqd->cp_hqd_pq_rptr_report_addr_lo);
   3630	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
   3631				mqd->cp_hqd_pq_rptr_report_addr_hi);
   3632
   3633	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
   3634	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
   3635	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
   3636	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
   3637	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
   3638
   3639	/* enable the doorbell if requested */
   3640	if (ring->use_doorbell) {
   3641		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
   3642					(adev->doorbell_index.kiq * 2) << 2);
   3643		/* If GC has entered CGPG, ringing doorbell > first page
   3644		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
   3645		 * workaround this issue. And this change has to align with firmware
   3646		 * update.
   3647		 */
   3648		if (check_if_enlarge_doorbell_range(adev))
   3649			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
   3650					(adev->doorbell.size - 4));
   3651		else
   3652			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
   3653					(adev->doorbell_index.userqueue_end * 2) << 2);
   3654	}
   3655
   3656	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
   3657	       mqd->cp_hqd_pq_doorbell_control);
   3658
   3659	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
   3660	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
   3661	       mqd->cp_hqd_pq_wptr_lo);
   3662	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
   3663	       mqd->cp_hqd_pq_wptr_hi);
   3664
   3665	/* set the vmid for the queue */
   3666	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
   3667
   3668	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
   3669	       mqd->cp_hqd_persistent_state);
   3670
   3671	/* activate the queue */
   3672	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
   3673	       mqd->cp_hqd_active);
   3674
   3675	if (ring->use_doorbell)
   3676		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
   3677
   3678	return 0;
   3679}
   3680
   3681static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
   3682{
   3683	struct amdgpu_device *adev = ring->adev;
   3684	int j;
   3685
   3686	/* disable the queue if it's active */
   3687	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
   3688
   3689		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
   3690
   3691		for (j = 0; j < adev->usec_timeout; j++) {
   3692			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
   3693				break;
   3694			udelay(1);
   3695		}
   3696
   3697		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
   3698			DRM_DEBUG("KIQ dequeue request failed.\n");
   3699
   3700			/* Manual disable if dequeue request times out */
   3701			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
   3702		}
   3703
   3704		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
   3705		      0);
   3706	}
   3707
   3708	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
   3709	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
   3710	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
   3711	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
   3712	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
   3713	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
   3714	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
   3715	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
   3716
   3717	return 0;
   3718}
   3719
   3720static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
   3721{
   3722	struct amdgpu_device *adev = ring->adev;
   3723	struct v9_mqd *mqd = ring->mqd_ptr;
   3724	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
   3725	struct v9_mqd *tmp_mqd;
   3726
   3727	gfx_v9_0_kiq_setting(ring);
   3728
   3729	/* GPU could be in bad state during probe, driver trigger the reset
   3730	 * after load the SMU, in this case , the mqd is not be initialized.
   3731	 * driver need to re-init the mqd.
   3732	 * check mqd->cp_hqd_pq_control since this value should not be 0
   3733	 */
   3734	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
   3735	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
   3736		/* for GPU_RESET case , reset MQD to a clean status */
   3737		if (adev->gfx.mec.mqd_backup[mqd_idx])
   3738			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
   3739
   3740		/* reset ring buffer */
   3741		ring->wptr = 0;
   3742		amdgpu_ring_clear_ring(ring);
   3743
   3744		mutex_lock(&adev->srbm_mutex);
   3745		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   3746		gfx_v9_0_kiq_init_register(ring);
   3747		soc15_grbm_select(adev, 0, 0, 0, 0);
   3748		mutex_unlock(&adev->srbm_mutex);
   3749	} else {
   3750		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
   3751		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
   3752		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
   3753		mutex_lock(&adev->srbm_mutex);
   3754		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   3755		gfx_v9_0_mqd_init(ring);
   3756		gfx_v9_0_kiq_init_register(ring);
   3757		soc15_grbm_select(adev, 0, 0, 0, 0);
   3758		mutex_unlock(&adev->srbm_mutex);
   3759
   3760		if (adev->gfx.mec.mqd_backup[mqd_idx])
   3761			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
   3762	}
   3763
   3764	return 0;
   3765}
   3766
   3767static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
   3768{
   3769	struct amdgpu_device *adev = ring->adev;
   3770	struct v9_mqd *mqd = ring->mqd_ptr;
   3771	int mqd_idx = ring - &adev->gfx.compute_ring[0];
   3772	struct v9_mqd *tmp_mqd;
   3773
   3774	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
   3775	 * is not be initialized before
   3776	 */
   3777	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
   3778
   3779	if (!tmp_mqd->cp_hqd_pq_control ||
   3780	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
   3781		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
   3782		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
   3783		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
   3784		mutex_lock(&adev->srbm_mutex);
   3785		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   3786		gfx_v9_0_mqd_init(ring);
   3787		soc15_grbm_select(adev, 0, 0, 0, 0);
   3788		mutex_unlock(&adev->srbm_mutex);
   3789
   3790		if (adev->gfx.mec.mqd_backup[mqd_idx])
   3791			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
   3792	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
   3793		/* reset MQD to a clean status */
   3794		if (adev->gfx.mec.mqd_backup[mqd_idx])
   3795			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
   3796
   3797		/* reset ring buffer */
   3798		ring->wptr = 0;
   3799		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
   3800		amdgpu_ring_clear_ring(ring);
   3801	} else {
   3802		amdgpu_ring_clear_ring(ring);
   3803	}
   3804
   3805	return 0;
   3806}
   3807
   3808static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
   3809{
   3810	struct amdgpu_ring *ring;
   3811	int r;
   3812
   3813	ring = &adev->gfx.kiq.ring;
   3814
   3815	r = amdgpu_bo_reserve(ring->mqd_obj, false);
   3816	if (unlikely(r != 0))
   3817		return r;
   3818
   3819	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
   3820	if (unlikely(r != 0))
   3821		return r;
   3822
   3823	gfx_v9_0_kiq_init_queue(ring);
   3824	amdgpu_bo_kunmap(ring->mqd_obj);
   3825	ring->mqd_ptr = NULL;
   3826	amdgpu_bo_unreserve(ring->mqd_obj);
   3827	ring->sched.ready = true;
   3828	return 0;
   3829}
   3830
   3831static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
   3832{
   3833	struct amdgpu_ring *ring = NULL;
   3834	int r = 0, i;
   3835
   3836	gfx_v9_0_cp_compute_enable(adev, true);
   3837
   3838	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   3839		ring = &adev->gfx.compute_ring[i];
   3840
   3841		r = amdgpu_bo_reserve(ring->mqd_obj, false);
   3842		if (unlikely(r != 0))
   3843			goto done;
   3844		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
   3845		if (!r) {
   3846			r = gfx_v9_0_kcq_init_queue(ring);
   3847			amdgpu_bo_kunmap(ring->mqd_obj);
   3848			ring->mqd_ptr = NULL;
   3849		}
   3850		amdgpu_bo_unreserve(ring->mqd_obj);
   3851		if (r)
   3852			goto done;
   3853	}
   3854
   3855	r = amdgpu_gfx_enable_kcq(adev);
   3856done:
   3857	return r;
   3858}
   3859
   3860static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
   3861{
   3862	int r, i;
   3863	struct amdgpu_ring *ring;
   3864
   3865	if (!(adev->flags & AMD_IS_APU))
   3866		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
   3867
   3868	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
   3869		if (adev->gfx.num_gfx_rings) {
   3870			/* legacy firmware loading */
   3871			r = gfx_v9_0_cp_gfx_load_microcode(adev);
   3872			if (r)
   3873				return r;
   3874		}
   3875
   3876		r = gfx_v9_0_cp_compute_load_microcode(adev);
   3877		if (r)
   3878			return r;
   3879	}
   3880
   3881	r = gfx_v9_0_kiq_resume(adev);
   3882	if (r)
   3883		return r;
   3884
   3885	if (adev->gfx.num_gfx_rings) {
   3886		r = gfx_v9_0_cp_gfx_resume(adev);
   3887		if (r)
   3888			return r;
   3889	}
   3890
   3891	r = gfx_v9_0_kcq_resume(adev);
   3892	if (r)
   3893		return r;
   3894
   3895	if (adev->gfx.num_gfx_rings) {
   3896		ring = &adev->gfx.gfx_ring[0];
   3897		r = amdgpu_ring_test_helper(ring);
   3898		if (r)
   3899			return r;
   3900	}
   3901
   3902	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   3903		ring = &adev->gfx.compute_ring[i];
   3904		amdgpu_ring_test_helper(ring);
   3905	}
   3906
   3907	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
   3908
   3909	return 0;
   3910}
   3911
   3912static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
   3913{
   3914	u32 tmp;
   3915
   3916	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
   3917	    adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
   3918		return;
   3919
   3920	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
   3921	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
   3922				adev->df.hash_status.hash_64k);
   3923	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
   3924				adev->df.hash_status.hash_2m);
   3925	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
   3926				adev->df.hash_status.hash_1g);
   3927	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
   3928}
   3929
   3930static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
   3931{
   3932	if (adev->gfx.num_gfx_rings)
   3933		gfx_v9_0_cp_gfx_enable(adev, enable);
   3934	gfx_v9_0_cp_compute_enable(adev, enable);
   3935}
   3936
   3937static int gfx_v9_0_hw_init(void *handle)
   3938{
   3939	int r;
   3940	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   3941
   3942	if (!amdgpu_sriov_vf(adev))
   3943		gfx_v9_0_init_golden_registers(adev);
   3944
   3945	gfx_v9_0_constants_init(adev);
   3946
   3947	gfx_v9_0_init_tcp_config(adev);
   3948
   3949	r = adev->gfx.rlc.funcs->resume(adev);
   3950	if (r)
   3951		return r;
   3952
   3953	r = gfx_v9_0_cp_resume(adev);
   3954	if (r)
   3955		return r;
   3956
   3957	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
   3958		gfx_v9_4_2_set_power_brake_sequence(adev);
   3959
   3960	return r;
   3961}
   3962
   3963static int gfx_v9_0_hw_fini(void *handle)
   3964{
   3965	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   3966
   3967	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
   3968	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
   3969	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
   3970
   3971	/* DF freeze and kcq disable will fail */
   3972	if (!amdgpu_ras_intr_triggered())
   3973		/* disable KCQ to avoid CPC touch memory not valid anymore */
   3974		amdgpu_gfx_disable_kcq(adev);
   3975
   3976	if (amdgpu_sriov_vf(adev)) {
   3977		gfx_v9_0_cp_gfx_enable(adev, false);
   3978		/* must disable polling for SRIOV when hw finished, otherwise
   3979		 * CPC engine may still keep fetching WB address which is already
   3980		 * invalid after sw finished and trigger DMAR reading error in
   3981		 * hypervisor side.
   3982		 */
   3983		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
   3984		return 0;
   3985	}
   3986
   3987	/* Use deinitialize sequence from CAIL when unbinding device from driver,
   3988	 * otherwise KIQ is hanging when binding back
   3989	 */
   3990	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
   3991		mutex_lock(&adev->srbm_mutex);
   3992		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
   3993				adev->gfx.kiq.ring.pipe,
   3994				adev->gfx.kiq.ring.queue, 0);
   3995		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
   3996		soc15_grbm_select(adev, 0, 0, 0, 0);
   3997		mutex_unlock(&adev->srbm_mutex);
   3998	}
   3999
   4000	gfx_v9_0_cp_enable(adev, false);
   4001
   4002	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
   4003	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
   4004	    (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
   4005		dev_dbg(adev->dev, "Skipping RLC halt\n");
   4006		return 0;
   4007	}
   4008
   4009	adev->gfx.rlc.funcs->stop(adev);
   4010	return 0;
   4011}
   4012
   4013static int gfx_v9_0_suspend(void *handle)
   4014{
   4015	return gfx_v9_0_hw_fini(handle);
   4016}
   4017
   4018static int gfx_v9_0_resume(void *handle)
   4019{
   4020	return gfx_v9_0_hw_init(handle);
   4021}
   4022
   4023static bool gfx_v9_0_is_idle(void *handle)
   4024{
   4025	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4026
   4027	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
   4028				GRBM_STATUS, GUI_ACTIVE))
   4029		return false;
   4030	else
   4031		return true;
   4032}
   4033
   4034static int gfx_v9_0_wait_for_idle(void *handle)
   4035{
   4036	unsigned i;
   4037	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4038
   4039	for (i = 0; i < adev->usec_timeout; i++) {
   4040		if (gfx_v9_0_is_idle(handle))
   4041			return 0;
   4042		udelay(1);
   4043	}
   4044	return -ETIMEDOUT;
   4045}
   4046
   4047static int gfx_v9_0_soft_reset(void *handle)
   4048{
   4049	u32 grbm_soft_reset = 0;
   4050	u32 tmp;
   4051	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4052
   4053	/* GRBM_STATUS */
   4054	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
   4055	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
   4056		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
   4057		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
   4058		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
   4059		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
   4060		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
   4061		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
   4062						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
   4063		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
   4064						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
   4065	}
   4066
   4067	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
   4068		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
   4069						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
   4070	}
   4071
   4072	/* GRBM_STATUS2 */
   4073	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
   4074	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
   4075		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
   4076						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
   4077
   4078
   4079	if (grbm_soft_reset) {
   4080		/* stop the rlc */
   4081		adev->gfx.rlc.funcs->stop(adev);
   4082
   4083		if (adev->gfx.num_gfx_rings)
   4084			/* Disable GFX parsing/prefetching */
   4085			gfx_v9_0_cp_gfx_enable(adev, false);
   4086
   4087		/* Disable MEC parsing/prefetching */
   4088		gfx_v9_0_cp_compute_enable(adev, false);
   4089
   4090		if (grbm_soft_reset) {
   4091			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
   4092			tmp |= grbm_soft_reset;
   4093			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
   4094			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
   4095			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
   4096
   4097			udelay(50);
   4098
   4099			tmp &= ~grbm_soft_reset;
   4100			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
   4101			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
   4102		}
   4103
   4104		/* Wait a little for things to settle down */
   4105		udelay(50);
   4106	}
   4107	return 0;
   4108}
   4109
   4110static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
   4111{
   4112	signed long r, cnt = 0;
   4113	unsigned long flags;
   4114	uint32_t seq, reg_val_offs = 0;
   4115	uint64_t value = 0;
   4116	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
   4117	struct amdgpu_ring *ring = &kiq->ring;
   4118
   4119	BUG_ON(!ring->funcs->emit_rreg);
   4120
   4121	spin_lock_irqsave(&kiq->ring_lock, flags);
   4122	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
   4123		pr_err("critical bug! too many kiq readers\n");
   4124		goto failed_unlock;
   4125	}
   4126	amdgpu_ring_alloc(ring, 32);
   4127	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
   4128	amdgpu_ring_write(ring, 9 |	/* src: register*/
   4129				(5 << 8) |	/* dst: memory */
   4130				(1 << 16) |	/* count sel */
   4131				(1 << 20));	/* write confirm */
   4132	amdgpu_ring_write(ring, 0);
   4133	amdgpu_ring_write(ring, 0);
   4134	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
   4135				reg_val_offs * 4));
   4136	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
   4137				reg_val_offs * 4));
   4138	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
   4139	if (r)
   4140		goto failed_undo;
   4141
   4142	amdgpu_ring_commit(ring);
   4143	spin_unlock_irqrestore(&kiq->ring_lock, flags);
   4144
   4145	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
   4146
   4147	/* don't wait anymore for gpu reset case because this way may
   4148	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
   4149	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
   4150	 * never return if we keep waiting in virt_kiq_rreg, which cause
   4151	 * gpu_recover() hang there.
   4152	 *
   4153	 * also don't wait anymore for IRQ context
   4154	 * */
   4155	if (r < 1 && (amdgpu_in_reset(adev)))
   4156		goto failed_kiq_read;
   4157
   4158	might_sleep();
   4159	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
   4160		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
   4161		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
   4162	}
   4163
   4164	if (cnt > MAX_KIQ_REG_TRY)
   4165		goto failed_kiq_read;
   4166
   4167	mb();
   4168	value = (uint64_t)adev->wb.wb[reg_val_offs] |
   4169		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
   4170	amdgpu_device_wb_free(adev, reg_val_offs);
   4171	return value;
   4172
   4173failed_undo:
   4174	amdgpu_ring_undo(ring);
   4175failed_unlock:
   4176	spin_unlock_irqrestore(&kiq->ring_lock, flags);
   4177failed_kiq_read:
   4178	if (reg_val_offs)
   4179		amdgpu_device_wb_free(adev, reg_val_offs);
   4180	pr_err("failed to read gpu clock\n");
   4181	return ~0;
   4182}
   4183
   4184static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
   4185{
   4186	uint64_t clock, clock_lo, clock_hi, hi_check;
   4187
   4188	switch (adev->ip_versions[GC_HWIP][0]) {
   4189	case IP_VERSION(9, 3, 0):
   4190		preempt_disable();
   4191		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
   4192		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
   4193		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
   4194		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
   4195		 * roughly every 42 seconds.
   4196		 */
   4197		if (hi_check != clock_hi) {
   4198			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
   4199			clock_hi = hi_check;
   4200		}
   4201		preempt_enable();
   4202		clock = clock_lo | (clock_hi << 32ULL);
   4203		break;
   4204	default:
   4205		amdgpu_gfx_off_ctrl(adev, false);
   4206		mutex_lock(&adev->gfx.gpu_clock_mutex);
   4207		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
   4208			clock = gfx_v9_0_kiq_read_clock(adev);
   4209		} else {
   4210			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
   4211			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
   4212				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
   4213		}
   4214		mutex_unlock(&adev->gfx.gpu_clock_mutex);
   4215		amdgpu_gfx_off_ctrl(adev, true);
   4216		break;
   4217	}
   4218	return clock;
   4219}
   4220
   4221static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
   4222					  uint32_t vmid,
   4223					  uint32_t gds_base, uint32_t gds_size,
   4224					  uint32_t gws_base, uint32_t gws_size,
   4225					  uint32_t oa_base, uint32_t oa_size)
   4226{
   4227	struct amdgpu_device *adev = ring->adev;
   4228
   4229	/* GDS Base */
   4230	gfx_v9_0_write_data_to_reg(ring, 0, false,
   4231				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
   4232				   gds_base);
   4233
   4234	/* GDS Size */
   4235	gfx_v9_0_write_data_to_reg(ring, 0, false,
   4236				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
   4237				   gds_size);
   4238
   4239	/* GWS */
   4240	gfx_v9_0_write_data_to_reg(ring, 0, false,
   4241				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
   4242				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
   4243
   4244	/* OA */
   4245	gfx_v9_0_write_data_to_reg(ring, 0, false,
   4246				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
   4247				   (1 << (oa_size + oa_base)) - (1 << oa_base));
   4248}
   4249
   4250static const u32 vgpr_init_compute_shader[] =
   4251{
   4252	0xb07c0000, 0xbe8000ff,
   4253	0x000000f8, 0xbf110800,
   4254	0x7e000280, 0x7e020280,
   4255	0x7e040280, 0x7e060280,
   4256	0x7e080280, 0x7e0a0280,
   4257	0x7e0c0280, 0x7e0e0280,
   4258	0x80808800, 0xbe803200,
   4259	0xbf84fff5, 0xbf9c0000,
   4260	0xd28c0001, 0x0001007f,
   4261	0xd28d0001, 0x0002027e,
   4262	0x10020288, 0xb8810904,
   4263	0xb7814000, 0xd1196a01,
   4264	0x00000301, 0xbe800087,
   4265	0xbefc00c1, 0xd89c4000,
   4266	0x00020201, 0xd89cc080,
   4267	0x00040401, 0x320202ff,
   4268	0x00000800, 0x80808100,
   4269	0xbf84fff8, 0x7e020280,
   4270	0xbf810000, 0x00000000,
   4271};
   4272
   4273static const u32 sgpr_init_compute_shader[] =
   4274{
   4275	0xb07c0000, 0xbe8000ff,
   4276	0x0000005f, 0xbee50080,
   4277	0xbe812c65, 0xbe822c65,
   4278	0xbe832c65, 0xbe842c65,
   4279	0xbe852c65, 0xb77c0005,
   4280	0x80808500, 0xbf84fff8,
   4281	0xbe800080, 0xbf810000,
   4282};
   4283
   4284static const u32 vgpr_init_compute_shader_arcturus[] = {
   4285	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
   4286	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
   4287	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
   4288	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
   4289	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
   4290	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
   4291	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
   4292	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
   4293	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
   4294	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
   4295	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
   4296	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
   4297	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
   4298	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
   4299	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
   4300	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
   4301	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
   4302	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
   4303	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
   4304	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
   4305	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
   4306	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
   4307	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
   4308	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
   4309	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
   4310	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
   4311	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
   4312	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
   4313	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
   4314	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
   4315	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
   4316	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
   4317	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
   4318	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
   4319	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
   4320	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
   4321	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
   4322	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
   4323	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
   4324	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
   4325	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
   4326	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
   4327	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
   4328	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
   4329	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
   4330	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
   4331	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
   4332	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
   4333	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
   4334	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
   4335	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
   4336	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
   4337	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
   4338	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
   4339	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
   4340	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
   4341	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
   4342	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
   4343	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
   4344	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
   4345	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
   4346	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
   4347	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
   4348	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
   4349	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
   4350	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
   4351	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
   4352	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
   4353	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
   4354	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
   4355	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
   4356	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
   4357	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
   4358	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
   4359	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
   4360	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
   4361	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
   4362	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
   4363	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
   4364	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
   4365	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
   4366	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
   4367	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
   4368	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
   4369	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
   4370	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
   4371	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
   4372	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
   4373	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
   4374	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
   4375	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
   4376	0xbf84fff8, 0xbf810000,
   4377};
   4378
   4379/* When below register arrays changed, please update gpr_reg_size,
   4380  and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
   4381  to cover all gfx9 ASICs */
   4382static const struct soc15_reg_entry vgpr_init_regs[] = {
   4383   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
   4384   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
   4385   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
   4386   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
   4387   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
   4388   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
   4389   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
   4390   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
   4391   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
   4392   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
   4393   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
   4394   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
   4395   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
   4396   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
   4397};
   4398
   4399static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
   4400   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
   4401   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
   4402   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
   4403   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
   4404   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
   4405   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
   4406   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
   4407   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
   4408   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
   4409   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
   4410   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
   4411   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
   4412   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
   4413   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
   4414};
   4415
   4416static const struct soc15_reg_entry sgpr1_init_regs[] = {
   4417   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
   4418   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
   4419   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
   4420   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
   4421   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
   4422   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
   4423   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
   4424   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
   4425   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
   4426   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
   4427   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
   4428   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
   4429   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
   4430   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
   4431};
   4432
   4433static const struct soc15_reg_entry sgpr2_init_regs[] = {
   4434   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
   4435   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
   4436   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
   4437   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
   4438   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
   4439   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
   4440   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
   4441   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
   4442   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
   4443   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
   4444   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
   4445   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
   4446   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
   4447   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
   4448};
   4449
   4450static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
   4451   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
   4452   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
   4453   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
   4454   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
   4455   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
   4456   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
   4457   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
   4458   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
   4459   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
   4460   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
   4461   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
   4462   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
   4463   { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
   4464   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
   4465   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
   4466   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
   4467   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
   4468   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
   4469   { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
   4470   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
   4471   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
   4472   { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
   4473   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
   4474   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
   4475   { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
   4476   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
   4477   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
   4478   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
   4479   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
   4480   { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
   4481   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
   4482   { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
   4483   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
   4484};
   4485
   4486static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
   4487{
   4488	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
   4489	int i, r;
   4490
   4491	/* only support when RAS is enabled */
   4492	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
   4493		return 0;
   4494
   4495	r = amdgpu_ring_alloc(ring, 7);
   4496	if (r) {
   4497		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
   4498			ring->name, r);
   4499		return r;
   4500	}
   4501
   4502	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
   4503	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
   4504
   4505	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
   4506	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
   4507				PACKET3_DMA_DATA_DST_SEL(1) |
   4508				PACKET3_DMA_DATA_SRC_SEL(2) |
   4509				PACKET3_DMA_DATA_ENGINE(0)));
   4510	amdgpu_ring_write(ring, 0);
   4511	amdgpu_ring_write(ring, 0);
   4512	amdgpu_ring_write(ring, 0);
   4513	amdgpu_ring_write(ring, 0);
   4514	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
   4515				adev->gds.gds_size);
   4516
   4517	amdgpu_ring_commit(ring);
   4518
   4519	for (i = 0; i < adev->usec_timeout; i++) {
   4520		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
   4521			break;
   4522		udelay(1);
   4523	}
   4524
   4525	if (i >= adev->usec_timeout)
   4526		r = -ETIMEDOUT;
   4527
   4528	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
   4529
   4530	return r;
   4531}
   4532
   4533static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
   4534{
   4535	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
   4536	struct amdgpu_ib ib;
   4537	struct dma_fence *f = NULL;
   4538	int r, i;
   4539	unsigned total_size, vgpr_offset, sgpr_offset;
   4540	u64 gpu_addr;
   4541
   4542	int compute_dim_x = adev->gfx.config.max_shader_engines *
   4543						adev->gfx.config.max_cu_per_sh *
   4544						adev->gfx.config.max_sh_per_se;
   4545	int sgpr_work_group_size = 5;
   4546	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
   4547	int vgpr_init_shader_size;
   4548	const u32 *vgpr_init_shader_ptr;
   4549	const struct soc15_reg_entry *vgpr_init_regs_ptr;
   4550
   4551	/* only support when RAS is enabled */
   4552	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
   4553		return 0;
   4554
   4555	/* bail if the compute ring is not ready */
   4556	if (!ring->sched.ready)
   4557		return 0;
   4558
   4559	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
   4560		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
   4561		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
   4562		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
   4563	} else {
   4564		vgpr_init_shader_ptr = vgpr_init_compute_shader;
   4565		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
   4566		vgpr_init_regs_ptr = vgpr_init_regs;
   4567	}
   4568
   4569	total_size =
   4570		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
   4571	total_size +=
   4572		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
   4573	total_size +=
   4574		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
   4575	total_size = ALIGN(total_size, 256);
   4576	vgpr_offset = total_size;
   4577	total_size += ALIGN(vgpr_init_shader_size, 256);
   4578	sgpr_offset = total_size;
   4579	total_size += sizeof(sgpr_init_compute_shader);
   4580
   4581	/* allocate an indirect buffer to put the commands in */
   4582	memset(&ib, 0, sizeof(ib));
   4583	r = amdgpu_ib_get(adev, NULL, total_size,
   4584					AMDGPU_IB_POOL_DIRECT, &ib);
   4585	if (r) {
   4586		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
   4587		return r;
   4588	}
   4589
   4590	/* load the compute shaders */
   4591	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
   4592		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
   4593
   4594	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
   4595		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
   4596
   4597	/* init the ib length to 0 */
   4598	ib.length_dw = 0;
   4599
   4600	/* VGPR */
   4601	/* write the register state for the compute dispatch */
   4602	for (i = 0; i < gpr_reg_size; i++) {
   4603		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
   4604		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
   4605								- PACKET3_SET_SH_REG_START;
   4606		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
   4607	}
   4608	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
   4609	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
   4610	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
   4611	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
   4612							- PACKET3_SET_SH_REG_START;
   4613	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
   4614	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
   4615
   4616	/* write dispatch packet */
   4617	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
   4618	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
   4619	ib.ptr[ib.length_dw++] = 1; /* y */
   4620	ib.ptr[ib.length_dw++] = 1; /* z */
   4621	ib.ptr[ib.length_dw++] =
   4622		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
   4623
   4624	/* write CS partial flush packet */
   4625	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
   4626	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
   4627
   4628	/* SGPR1 */
   4629	/* write the register state for the compute dispatch */
   4630	for (i = 0; i < gpr_reg_size; i++) {
   4631		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
   4632		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
   4633								- PACKET3_SET_SH_REG_START;
   4634		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
   4635	}
   4636	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
   4637	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
   4638	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
   4639	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
   4640							- PACKET3_SET_SH_REG_START;
   4641	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
   4642	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
   4643
   4644	/* write dispatch packet */
   4645	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
   4646	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
   4647	ib.ptr[ib.length_dw++] = 1; /* y */
   4648	ib.ptr[ib.length_dw++] = 1; /* z */
   4649	ib.ptr[ib.length_dw++] =
   4650		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
   4651
   4652	/* write CS partial flush packet */
   4653	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
   4654	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
   4655
   4656	/* SGPR2 */
   4657	/* write the register state for the compute dispatch */
   4658	for (i = 0; i < gpr_reg_size; i++) {
   4659		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
   4660		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
   4661								- PACKET3_SET_SH_REG_START;
   4662		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
   4663	}
   4664	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
   4665	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
   4666	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
   4667	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
   4668							- PACKET3_SET_SH_REG_START;
   4669	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
   4670	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
   4671
   4672	/* write dispatch packet */
   4673	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
   4674	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
   4675	ib.ptr[ib.length_dw++] = 1; /* y */
   4676	ib.ptr[ib.length_dw++] = 1; /* z */
   4677	ib.ptr[ib.length_dw++] =
   4678		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
   4679
   4680	/* write CS partial flush packet */
   4681	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
   4682	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
   4683
   4684	/* shedule the ib on the ring */
   4685	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
   4686	if (r) {
   4687		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
   4688		goto fail;
   4689	}
   4690
   4691	/* wait for the GPU to finish processing the IB */
   4692	r = dma_fence_wait(f, false);
   4693	if (r) {
   4694		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
   4695		goto fail;
   4696	}
   4697
   4698fail:
   4699	amdgpu_ib_free(adev, &ib, NULL);
   4700	dma_fence_put(f);
   4701
   4702	return r;
   4703}
   4704
   4705static int gfx_v9_0_early_init(void *handle)
   4706{
   4707	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4708
   4709	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
   4710	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
   4711		adev->gfx.num_gfx_rings = 0;
   4712	else
   4713		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
   4714	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
   4715					  AMDGPU_MAX_COMPUTE_RINGS);
   4716	gfx_v9_0_set_kiq_pm4_funcs(adev);
   4717	gfx_v9_0_set_ring_funcs(adev);
   4718	gfx_v9_0_set_irq_funcs(adev);
   4719	gfx_v9_0_set_gds_init(adev);
   4720	gfx_v9_0_set_rlc_funcs(adev);
   4721
   4722	/* init rlcg reg access ctrl */
   4723	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
   4724
   4725	return 0;
   4726}
   4727
   4728static int gfx_v9_0_ecc_late_init(void *handle)
   4729{
   4730	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4731	int r;
   4732
   4733	/*
   4734	 * Temp workaround to fix the issue that CP firmware fails to
   4735	 * update read pointer when CPDMA is writing clearing operation
   4736	 * to GDS in suspend/resume sequence on several cards. So just
   4737	 * limit this operation in cold boot sequence.
   4738	 */
   4739	if ((!adev->in_suspend) &&
   4740	    (adev->gds.gds_size)) {
   4741		r = gfx_v9_0_do_edc_gds_workarounds(adev);
   4742		if (r)
   4743			return r;
   4744	}
   4745
   4746	/* requires IBs so do in late init after IB pool is initialized */
   4747	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
   4748		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
   4749	else
   4750		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
   4751
   4752	if (r)
   4753		return r;
   4754
   4755	if (adev->gfx.ras &&
   4756	    adev->gfx.ras->enable_watchdog_timer)
   4757		adev->gfx.ras->enable_watchdog_timer(adev);
   4758
   4759	return 0;
   4760}
   4761
   4762static int gfx_v9_0_late_init(void *handle)
   4763{
   4764	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4765	int r;
   4766
   4767	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
   4768	if (r)
   4769		return r;
   4770
   4771	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
   4772	if (r)
   4773		return r;
   4774
   4775	r = gfx_v9_0_ecc_late_init(handle);
   4776	if (r)
   4777		return r;
   4778
   4779	return 0;
   4780}
   4781
   4782static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
   4783{
   4784	uint32_t rlc_setting;
   4785
   4786	/* if RLC is not enabled, do nothing */
   4787	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
   4788	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
   4789		return false;
   4790
   4791	return true;
   4792}
   4793
   4794static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
   4795{
   4796	uint32_t data;
   4797	unsigned i;
   4798
   4799	data = RLC_SAFE_MODE__CMD_MASK;
   4800	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
   4801	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
   4802
   4803	/* wait for RLC_SAFE_MODE */
   4804	for (i = 0; i < adev->usec_timeout; i++) {
   4805		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
   4806			break;
   4807		udelay(1);
   4808	}
   4809}
   4810
   4811static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
   4812{
   4813	uint32_t data;
   4814
   4815	data = RLC_SAFE_MODE__CMD_MASK;
   4816	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
   4817}
   4818
   4819static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
   4820						bool enable)
   4821{
   4822	amdgpu_gfx_rlc_enter_safe_mode(adev);
   4823
   4824	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
   4825		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
   4826		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
   4827			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
   4828	} else {
   4829		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
   4830		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
   4831			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
   4832	}
   4833
   4834	amdgpu_gfx_rlc_exit_safe_mode(adev);
   4835}
   4836
   4837static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
   4838						bool enable)
   4839{
   4840	/* TODO: double check if we need to perform under safe mode */
   4841	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
   4842
   4843	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
   4844		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
   4845	else
   4846		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
   4847
   4848	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
   4849		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
   4850	else
   4851		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
   4852
   4853	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
   4854}
   4855
   4856static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
   4857						      bool enable)
   4858{
   4859	uint32_t data, def;
   4860
   4861	amdgpu_gfx_rlc_enter_safe_mode(adev);
   4862
   4863	/* It is disabled by HW by default */
   4864	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
   4865		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
   4866		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
   4867
   4868		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
   4869			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
   4870
   4871		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
   4872			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
   4873			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
   4874
   4875		/* only for Vega10 & Raven1 */
   4876		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
   4877
   4878		if (def != data)
   4879			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
   4880
   4881		/* MGLS is a global flag to control all MGLS in GFX */
   4882		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
   4883			/* 2 - RLC memory Light sleep */
   4884			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
   4885				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
   4886				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
   4887				if (def != data)
   4888					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
   4889			}
   4890			/* 3 - CP memory Light sleep */
   4891			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
   4892				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
   4893				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
   4894				if (def != data)
   4895					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
   4896			}
   4897		}
   4898	} else {
   4899		/* 1 - MGCG_OVERRIDE */
   4900		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
   4901
   4902		if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
   4903			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
   4904
   4905		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
   4906			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
   4907			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
   4908			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
   4909
   4910		if (def != data)
   4911			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
   4912
   4913		/* 2 - disable MGLS in RLC */
   4914		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
   4915		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
   4916			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
   4917			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
   4918		}
   4919
   4920		/* 3 - disable MGLS in CP */
   4921		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
   4922		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
   4923			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
   4924			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
   4925		}
   4926	}
   4927
   4928	amdgpu_gfx_rlc_exit_safe_mode(adev);
   4929}
   4930
   4931static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
   4932					   bool enable)
   4933{
   4934	uint32_t data, def;
   4935
   4936	if (!adev->gfx.num_gfx_rings)
   4937		return;
   4938
   4939	amdgpu_gfx_rlc_enter_safe_mode(adev);
   4940
   4941	/* Enable 3D CGCG/CGLS */
   4942	if (enable) {
   4943		/* write cmd to clear cgcg/cgls ov */
   4944		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
   4945		/* unset CGCG override */
   4946		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
   4947		/* update CGCG and CGLS override bits */
   4948		if (def != data)
   4949			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
   4950
   4951		/* enable 3Dcgcg FSM(0x0000363f) */
   4952		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
   4953
   4954		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
   4955			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
   4956				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
   4957		else
   4958			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
   4959
   4960		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
   4961			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
   4962				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
   4963		if (def != data)
   4964			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
   4965
   4966		/* set IDLE_POLL_COUNT(0x00900100) */
   4967		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
   4968		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
   4969			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
   4970		if (def != data)
   4971			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
   4972	} else {
   4973		/* Disable CGCG/CGLS */
   4974		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
   4975		/* disable cgcg, cgls should be disabled */
   4976		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
   4977			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
   4978		/* disable cgcg and cgls in FSM */
   4979		if (def != data)
   4980			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
   4981	}
   4982
   4983	amdgpu_gfx_rlc_exit_safe_mode(adev);
   4984}
   4985
   4986static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
   4987						      bool enable)
   4988{
   4989	uint32_t def, data;
   4990
   4991	amdgpu_gfx_rlc_enter_safe_mode(adev);
   4992
   4993	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
   4994		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
   4995		/* unset CGCG override */
   4996		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
   4997		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
   4998			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
   4999		else
   5000			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
   5001		/* update CGCG and CGLS override bits */
   5002		if (def != data)
   5003			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
   5004
   5005		/* enable cgcg FSM(0x0000363F) */
   5006		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
   5007
   5008		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
   5009			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
   5010				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
   5011		else
   5012			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
   5013				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
   5014		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
   5015			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
   5016				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
   5017		if (def != data)
   5018			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
   5019
   5020		/* set IDLE_POLL_COUNT(0x00900100) */
   5021		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
   5022		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
   5023			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
   5024		if (def != data)
   5025			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
   5026	} else {
   5027		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
   5028		/* reset CGCG/CGLS bits */
   5029		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
   5030		/* disable cgcg and cgls in FSM */
   5031		if (def != data)
   5032			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
   5033	}
   5034
   5035	amdgpu_gfx_rlc_exit_safe_mode(adev);
   5036}
   5037
   5038static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
   5039					    bool enable)
   5040{
   5041	if (enable) {
   5042		/* CGCG/CGLS should be enabled after MGCG/MGLS
   5043		 * ===  MGCG + MGLS ===
   5044		 */
   5045		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
   5046		/* ===  CGCG /CGLS for GFX 3D Only === */
   5047		gfx_v9_0_update_3d_clock_gating(adev, enable);
   5048		/* ===  CGCG + CGLS === */
   5049		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
   5050	} else {
   5051		/* CGCG/CGLS should be disabled before MGCG/MGLS
   5052		 * ===  CGCG + CGLS ===
   5053		 */
   5054		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
   5055		/* ===  CGCG /CGLS for GFX 3D Only === */
   5056		gfx_v9_0_update_3d_clock_gating(adev, enable);
   5057		/* ===  MGCG + MGLS === */
   5058		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
   5059	}
   5060	return 0;
   5061}
   5062
   5063static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
   5064{
   5065	u32 reg, data;
   5066
   5067	amdgpu_gfx_off_ctrl(adev, false);
   5068
   5069	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
   5070	if (amdgpu_sriov_is_pp_one_vf(adev))
   5071		data = RREG32_NO_KIQ(reg);
   5072	else
   5073		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
   5074
   5075	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
   5076	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
   5077
   5078	if (amdgpu_sriov_is_pp_one_vf(adev))
   5079		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
   5080	else
   5081		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
   5082
   5083	amdgpu_gfx_off_ctrl(adev, true);
   5084}
   5085
   5086static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
   5087					uint32_t offset,
   5088					struct soc15_reg_rlcg *entries, int arr_size)
   5089{
   5090	int i;
   5091	uint32_t reg;
   5092
   5093	if (!entries)
   5094		return false;
   5095
   5096	for (i = 0; i < arr_size; i++) {
   5097		const struct soc15_reg_rlcg *entry;
   5098
   5099		entry = &entries[i];
   5100		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
   5101		if (offset == reg)
   5102			return true;
   5103	}
   5104
   5105	return false;
   5106}
   5107
   5108static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
   5109{
   5110	return gfx_v9_0_check_rlcg_range(adev, offset,
   5111					(void *)rlcg_access_gc_9_0,
   5112					ARRAY_SIZE(rlcg_access_gc_9_0));
   5113}
   5114
   5115static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
   5116	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
   5117	.set_safe_mode = gfx_v9_0_set_safe_mode,
   5118	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
   5119	.init = gfx_v9_0_rlc_init,
   5120	.get_csb_size = gfx_v9_0_get_csb_size,
   5121	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
   5122	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
   5123	.resume = gfx_v9_0_rlc_resume,
   5124	.stop = gfx_v9_0_rlc_stop,
   5125	.reset = gfx_v9_0_rlc_reset,
   5126	.start = gfx_v9_0_rlc_start,
   5127	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
   5128	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
   5129};
   5130
   5131static int gfx_v9_0_set_powergating_state(void *handle,
   5132					  enum amd_powergating_state state)
   5133{
   5134	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5135	bool enable = (state == AMD_PG_STATE_GATE);
   5136
   5137	switch (adev->ip_versions[GC_HWIP][0]) {
   5138	case IP_VERSION(9, 2, 2):
   5139	case IP_VERSION(9, 1, 0):
   5140	case IP_VERSION(9, 3, 0):
   5141		if (!enable)
   5142			amdgpu_gfx_off_ctrl(adev, false);
   5143
   5144		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
   5145			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
   5146			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
   5147		} else {
   5148			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
   5149			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
   5150		}
   5151
   5152		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
   5153			gfx_v9_0_enable_cp_power_gating(adev, true);
   5154		else
   5155			gfx_v9_0_enable_cp_power_gating(adev, false);
   5156
   5157		/* update gfx cgpg state */
   5158		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
   5159
   5160		/* update mgcg state */
   5161		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
   5162
   5163		if (enable)
   5164			amdgpu_gfx_off_ctrl(adev, true);
   5165		break;
   5166	case IP_VERSION(9, 2, 1):
   5167		amdgpu_gfx_off_ctrl(adev, enable);
   5168		break;
   5169	default:
   5170		break;
   5171	}
   5172
   5173	return 0;
   5174}
   5175
   5176static int gfx_v9_0_set_clockgating_state(void *handle,
   5177					  enum amd_clockgating_state state)
   5178{
   5179	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5180
   5181	if (amdgpu_sriov_vf(adev))
   5182		return 0;
   5183
   5184	switch (adev->ip_versions[GC_HWIP][0]) {
   5185	case IP_VERSION(9, 0, 1):
   5186	case IP_VERSION(9, 2, 1):
   5187	case IP_VERSION(9, 4, 0):
   5188	case IP_VERSION(9, 2, 2):
   5189	case IP_VERSION(9, 1, 0):
   5190	case IP_VERSION(9, 4, 1):
   5191	case IP_VERSION(9, 3, 0):
   5192	case IP_VERSION(9, 4, 2):
   5193		gfx_v9_0_update_gfx_clock_gating(adev,
   5194						 state == AMD_CG_STATE_GATE);
   5195		break;
   5196	default:
   5197		break;
   5198	}
   5199	return 0;
   5200}
   5201
   5202static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
   5203{
   5204	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5205	int data;
   5206
   5207	if (amdgpu_sriov_vf(adev))
   5208		*flags = 0;
   5209
   5210	/* AMD_CG_SUPPORT_GFX_MGCG */
   5211	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
   5212	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
   5213		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
   5214
   5215	/* AMD_CG_SUPPORT_GFX_CGCG */
   5216	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
   5217	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
   5218		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
   5219
   5220	/* AMD_CG_SUPPORT_GFX_CGLS */
   5221	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
   5222		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
   5223
   5224	/* AMD_CG_SUPPORT_GFX_RLC_LS */
   5225	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
   5226	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
   5227		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
   5228
   5229	/* AMD_CG_SUPPORT_GFX_CP_LS */
   5230	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
   5231	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
   5232		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
   5233
   5234	if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
   5235		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
   5236		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
   5237		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
   5238			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
   5239
   5240		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
   5241		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
   5242			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
   5243	}
   5244}
   5245
   5246static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
   5247{
   5248	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
   5249}
   5250
   5251static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
   5252{
   5253	struct amdgpu_device *adev = ring->adev;
   5254	u64 wptr;
   5255
   5256	/* XXX check if swapping is necessary on BE */
   5257	if (ring->use_doorbell) {
   5258		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
   5259	} else {
   5260		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
   5261		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
   5262	}
   5263
   5264	return wptr;
   5265}
   5266
   5267static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
   5268{
   5269	struct amdgpu_device *adev = ring->adev;
   5270
   5271	if (ring->use_doorbell) {
   5272		/* XXX check if swapping is necessary on BE */
   5273		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
   5274		WDOORBELL64(ring->doorbell_index, ring->wptr);
   5275	} else {
   5276		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
   5277		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
   5278	}
   5279}
   5280
   5281static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
   5282{
   5283	struct amdgpu_device *adev = ring->adev;
   5284	u32 ref_and_mask, reg_mem_engine;
   5285	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
   5286
   5287	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
   5288		switch (ring->me) {
   5289		case 1:
   5290			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
   5291			break;
   5292		case 2:
   5293			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
   5294			break;
   5295		default:
   5296			return;
   5297		}
   5298		reg_mem_engine = 0;
   5299	} else {
   5300		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
   5301		reg_mem_engine = 1; /* pfp */
   5302	}
   5303
   5304	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
   5305			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
   5306			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
   5307			      ref_and_mask, ref_and_mask, 0x20);
   5308}
   5309
   5310static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
   5311					struct amdgpu_job *job,
   5312					struct amdgpu_ib *ib,
   5313					uint32_t flags)
   5314{
   5315	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
   5316	u32 header, control = 0;
   5317
   5318	if (ib->flags & AMDGPU_IB_FLAG_CE)
   5319		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
   5320	else
   5321		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
   5322
   5323	control |= ib->length_dw | (vmid << 24);
   5324
   5325	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
   5326		control |= INDIRECT_BUFFER_PRE_ENB(1);
   5327
   5328		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
   5329			gfx_v9_0_ring_emit_de_meta(ring);
   5330	}
   5331
   5332	amdgpu_ring_write(ring, header);
   5333	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
   5334	amdgpu_ring_write(ring,
   5335#ifdef __BIG_ENDIAN
   5336		(2 << 0) |
   5337#endif
   5338		lower_32_bits(ib->gpu_addr));
   5339	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
   5340	amdgpu_ring_write(ring, control);
   5341}
   5342
   5343static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
   5344					  struct amdgpu_job *job,
   5345					  struct amdgpu_ib *ib,
   5346					  uint32_t flags)
   5347{
   5348	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
   5349	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
   5350
   5351	/* Currently, there is a high possibility to get wave ID mismatch
   5352	 * between ME and GDS, leading to a hw deadlock, because ME generates
   5353	 * different wave IDs than the GDS expects. This situation happens
   5354	 * randomly when at least 5 compute pipes use GDS ordered append.
   5355	 * The wave IDs generated by ME are also wrong after suspend/resume.
   5356	 * Those are probably bugs somewhere else in the kernel driver.
   5357	 *
   5358	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
   5359	 * GDS to 0 for this ring (me/pipe).
   5360	 */
   5361	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
   5362		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
   5363		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
   5364		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
   5365	}
   5366
   5367	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
   5368	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
   5369	amdgpu_ring_write(ring,
   5370#ifdef __BIG_ENDIAN
   5371				(2 << 0) |
   5372#endif
   5373				lower_32_bits(ib->gpu_addr));
   5374	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
   5375	amdgpu_ring_write(ring, control);
   5376}
   5377
   5378static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
   5379				     u64 seq, unsigned flags)
   5380{
   5381	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
   5382	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
   5383	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
   5384
   5385	/* RELEASE_MEM - flush caches, send int */
   5386	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
   5387	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
   5388					       EOP_TC_NC_ACTION_EN) :
   5389					      (EOP_TCL1_ACTION_EN |
   5390					       EOP_TC_ACTION_EN |
   5391					       EOP_TC_WB_ACTION_EN |
   5392					       EOP_TC_MD_ACTION_EN)) |
   5393				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   5394				 EVENT_INDEX(5)));
   5395	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
   5396
   5397	/*
   5398	 * the address should be Qword aligned if 64bit write, Dword
   5399	 * aligned if only send 32bit data low (discard data high)
   5400	 */
   5401	if (write64bit)
   5402		BUG_ON(addr & 0x7);
   5403	else
   5404		BUG_ON(addr & 0x3);
   5405	amdgpu_ring_write(ring, lower_32_bits(addr));
   5406	amdgpu_ring_write(ring, upper_32_bits(addr));
   5407	amdgpu_ring_write(ring, lower_32_bits(seq));
   5408	amdgpu_ring_write(ring, upper_32_bits(seq));
   5409	amdgpu_ring_write(ring, 0);
   5410}
   5411
   5412static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
   5413{
   5414	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
   5415	uint32_t seq = ring->fence_drv.sync_seq;
   5416	uint64_t addr = ring->fence_drv.gpu_addr;
   5417
   5418	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
   5419			      lower_32_bits(addr), upper_32_bits(addr),
   5420			      seq, 0xffffffff, 4);
   5421}
   5422
   5423static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
   5424					unsigned vmid, uint64_t pd_addr)
   5425{
   5426	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
   5427
   5428	/* compute doesn't have PFP */
   5429	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
   5430		/* sync PFP to ME, otherwise we might get invalid PFP reads */
   5431		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
   5432		amdgpu_ring_write(ring, 0x0);
   5433	}
   5434}
   5435
   5436static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
   5437{
   5438	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
   5439}
   5440
   5441static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
   5442{
   5443	u64 wptr;
   5444
   5445	/* XXX check if swapping is necessary on BE */
   5446	if (ring->use_doorbell)
   5447		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
   5448	else
   5449		BUG();
   5450	return wptr;
   5451}
   5452
   5453static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
   5454{
   5455	struct amdgpu_device *adev = ring->adev;
   5456
   5457	/* XXX check if swapping is necessary on BE */
   5458	if (ring->use_doorbell) {
   5459		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
   5460		WDOORBELL64(ring->doorbell_index, ring->wptr);
   5461	} else{
   5462		BUG(); /* only DOORBELL method supported on gfx9 now */
   5463	}
   5464}
   5465
   5466static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
   5467					 u64 seq, unsigned int flags)
   5468{
   5469	struct amdgpu_device *adev = ring->adev;
   5470
   5471	/* we only allocate 32bit for each seq wb address */
   5472	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
   5473
   5474	/* write fence seq to the "addr" */
   5475	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5476	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   5477				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
   5478	amdgpu_ring_write(ring, lower_32_bits(addr));
   5479	amdgpu_ring_write(ring, upper_32_bits(addr));
   5480	amdgpu_ring_write(ring, lower_32_bits(seq));
   5481
   5482	if (flags & AMDGPU_FENCE_FLAG_INT) {
   5483		/* set register to trigger INT */
   5484		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5485		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   5486					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
   5487		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
   5488		amdgpu_ring_write(ring, 0);
   5489		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
   5490	}
   5491}
   5492
   5493static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
   5494{
   5495	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
   5496	amdgpu_ring_write(ring, 0);
   5497}
   5498
   5499static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
   5500{
   5501	struct v9_ce_ib_state ce_payload = {0};
   5502	uint64_t csa_addr;
   5503	int cnt;
   5504
   5505	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
   5506	csa_addr = amdgpu_csa_vaddr(ring->adev);
   5507
   5508	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
   5509	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
   5510				 WRITE_DATA_DST_SEL(8) |
   5511				 WR_CONFIRM) |
   5512				 WRITE_DATA_CACHE_POLICY(0));
   5513	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
   5514	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
   5515	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
   5516}
   5517
   5518static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
   5519{
   5520	struct v9_de_ib_state de_payload = {0};
   5521	uint64_t csa_addr, gds_addr;
   5522	int cnt;
   5523
   5524	csa_addr = amdgpu_csa_vaddr(ring->adev);
   5525	gds_addr = csa_addr + 4096;
   5526	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
   5527	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
   5528
   5529	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
   5530	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
   5531	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
   5532				 WRITE_DATA_DST_SEL(8) |
   5533				 WR_CONFIRM) |
   5534				 WRITE_DATA_CACHE_POLICY(0));
   5535	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
   5536	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
   5537	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
   5538}
   5539
   5540static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
   5541				   bool secure)
   5542{
   5543	uint32_t v = secure ? FRAME_TMZ : 0;
   5544
   5545	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
   5546	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
   5547}
   5548
   5549static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
   5550{
   5551	uint32_t dw2 = 0;
   5552
   5553	if (amdgpu_sriov_vf(ring->adev))
   5554		gfx_v9_0_ring_emit_ce_meta(ring);
   5555
   5556	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
   5557	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
   5558		/* set load_global_config & load_global_uconfig */
   5559		dw2 |= 0x8001;
   5560		/* set load_cs_sh_regs */
   5561		dw2 |= 0x01000000;
   5562		/* set load_per_context_state & load_gfx_sh_regs for GFX */
   5563		dw2 |= 0x10002;
   5564
   5565		/* set load_ce_ram if preamble presented */
   5566		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
   5567			dw2 |= 0x10000000;
   5568	} else {
   5569		/* still load_ce_ram if this is the first time preamble presented
   5570		 * although there is no context switch happens.
   5571		 */
   5572		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
   5573			dw2 |= 0x10000000;
   5574	}
   5575
   5576	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   5577	amdgpu_ring_write(ring, dw2);
   5578	amdgpu_ring_write(ring, 0);
   5579}
   5580
   5581static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
   5582{
   5583	unsigned ret;
   5584	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
   5585	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
   5586	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
   5587	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
   5588	ret = ring->wptr & ring->buf_mask;
   5589	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
   5590	return ret;
   5591}
   5592
   5593static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
   5594{
   5595	unsigned cur;
   5596	BUG_ON(offset > ring->buf_mask);
   5597	BUG_ON(ring->ring[offset] != 0x55aa55aa);
   5598
   5599	cur = (ring->wptr & ring->buf_mask) - 1;
   5600	if (likely(cur > offset))
   5601		ring->ring[offset] = cur - offset;
   5602	else
   5603		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
   5604}
   5605
   5606static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
   5607				    uint32_t reg_val_offs)
   5608{
   5609	struct amdgpu_device *adev = ring->adev;
   5610
   5611	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
   5612	amdgpu_ring_write(ring, 0 |	/* src: register*/
   5613				(5 << 8) |	/* dst: memory */
   5614				(1 << 20));	/* write confirm */
   5615	amdgpu_ring_write(ring, reg);
   5616	amdgpu_ring_write(ring, 0);
   5617	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
   5618				reg_val_offs * 4));
   5619	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
   5620				reg_val_offs * 4));
   5621}
   5622
   5623static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
   5624				    uint32_t val)
   5625{
   5626	uint32_t cmd = 0;
   5627
   5628	switch (ring->funcs->type) {
   5629	case AMDGPU_RING_TYPE_GFX:
   5630		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
   5631		break;
   5632	case AMDGPU_RING_TYPE_KIQ:
   5633		cmd = (1 << 16); /* no inc addr */
   5634		break;
   5635	default:
   5636		cmd = WR_CONFIRM;
   5637		break;
   5638	}
   5639	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5640	amdgpu_ring_write(ring, cmd);
   5641	amdgpu_ring_write(ring, reg);
   5642	amdgpu_ring_write(ring, 0);
   5643	amdgpu_ring_write(ring, val);
   5644}
   5645
   5646static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
   5647					uint32_t val, uint32_t mask)
   5648{
   5649	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
   5650}
   5651
   5652static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
   5653						  uint32_t reg0, uint32_t reg1,
   5654						  uint32_t ref, uint32_t mask)
   5655{
   5656	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
   5657	struct amdgpu_device *adev = ring->adev;
   5658	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
   5659		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
   5660
   5661	if (fw_version_ok)
   5662		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
   5663				      ref, mask, 0x20);
   5664	else
   5665		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
   5666							   ref, mask);
   5667}
   5668
   5669static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
   5670{
   5671	struct amdgpu_device *adev = ring->adev;
   5672	uint32_t value = 0;
   5673
   5674	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
   5675	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
   5676	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
   5677	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
   5678	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
   5679}
   5680
   5681static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
   5682						 enum amdgpu_interrupt_state state)
   5683{
   5684	switch (state) {
   5685	case AMDGPU_IRQ_STATE_DISABLE:
   5686	case AMDGPU_IRQ_STATE_ENABLE:
   5687		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
   5688			       TIME_STAMP_INT_ENABLE,
   5689			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
   5690		break;
   5691	default:
   5692		break;
   5693	}
   5694}
   5695
   5696static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
   5697						     int me, int pipe,
   5698						     enum amdgpu_interrupt_state state)
   5699{
   5700	u32 mec_int_cntl, mec_int_cntl_reg;
   5701
   5702	/*
   5703	 * amdgpu controls only the first MEC. That's why this function only
   5704	 * handles the setting of interrupts for this specific MEC. All other
   5705	 * pipes' interrupts are set by amdkfd.
   5706	 */
   5707
   5708	if (me == 1) {
   5709		switch (pipe) {
   5710		case 0:
   5711			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
   5712			break;
   5713		case 1:
   5714			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
   5715			break;
   5716		case 2:
   5717			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
   5718			break;
   5719		case 3:
   5720			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
   5721			break;
   5722		default:
   5723			DRM_DEBUG("invalid pipe %d\n", pipe);
   5724			return;
   5725		}
   5726	} else {
   5727		DRM_DEBUG("invalid me %d\n", me);
   5728		return;
   5729	}
   5730
   5731	switch (state) {
   5732	case AMDGPU_IRQ_STATE_DISABLE:
   5733		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
   5734		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
   5735					     TIME_STAMP_INT_ENABLE, 0);
   5736		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
   5737		break;
   5738	case AMDGPU_IRQ_STATE_ENABLE:
   5739		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
   5740		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
   5741					     TIME_STAMP_INT_ENABLE, 1);
   5742		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
   5743		break;
   5744	default:
   5745		break;
   5746	}
   5747}
   5748
   5749static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
   5750					     struct amdgpu_irq_src *source,
   5751					     unsigned type,
   5752					     enum amdgpu_interrupt_state state)
   5753{
   5754	switch (state) {
   5755	case AMDGPU_IRQ_STATE_DISABLE:
   5756	case AMDGPU_IRQ_STATE_ENABLE:
   5757		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
   5758			       PRIV_REG_INT_ENABLE,
   5759			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
   5760		break;
   5761	default:
   5762		break;
   5763	}
   5764
   5765	return 0;
   5766}
   5767
   5768static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
   5769					      struct amdgpu_irq_src *source,
   5770					      unsigned type,
   5771					      enum amdgpu_interrupt_state state)
   5772{
   5773	switch (state) {
   5774	case AMDGPU_IRQ_STATE_DISABLE:
   5775	case AMDGPU_IRQ_STATE_ENABLE:
   5776		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
   5777			       PRIV_INSTR_INT_ENABLE,
   5778			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
   5779		break;
   5780	default:
   5781		break;
   5782	}
   5783
   5784	return 0;
   5785}
   5786
   5787#define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
   5788	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
   5789			CP_ECC_ERROR_INT_ENABLE, 1)
   5790
   5791#define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
   5792	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
   5793			CP_ECC_ERROR_INT_ENABLE, 0)
   5794
   5795static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
   5796					      struct amdgpu_irq_src *source,
   5797					      unsigned type,
   5798					      enum amdgpu_interrupt_state state)
   5799{
   5800	switch (state) {
   5801	case AMDGPU_IRQ_STATE_DISABLE:
   5802		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
   5803				CP_ECC_ERROR_INT_ENABLE, 0);
   5804		DISABLE_ECC_ON_ME_PIPE(1, 0);
   5805		DISABLE_ECC_ON_ME_PIPE(1, 1);
   5806		DISABLE_ECC_ON_ME_PIPE(1, 2);
   5807		DISABLE_ECC_ON_ME_PIPE(1, 3);
   5808		break;
   5809
   5810	case AMDGPU_IRQ_STATE_ENABLE:
   5811		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
   5812				CP_ECC_ERROR_INT_ENABLE, 1);
   5813		ENABLE_ECC_ON_ME_PIPE(1, 0);
   5814		ENABLE_ECC_ON_ME_PIPE(1, 1);
   5815		ENABLE_ECC_ON_ME_PIPE(1, 2);
   5816		ENABLE_ECC_ON_ME_PIPE(1, 3);
   5817		break;
   5818	default:
   5819		break;
   5820	}
   5821
   5822	return 0;
   5823}
   5824
   5825
   5826static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
   5827					    struct amdgpu_irq_src *src,
   5828					    unsigned type,
   5829					    enum amdgpu_interrupt_state state)
   5830{
   5831	switch (type) {
   5832	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
   5833		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
   5834		break;
   5835	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
   5836		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
   5837		break;
   5838	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
   5839		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
   5840		break;
   5841	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
   5842		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
   5843		break;
   5844	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
   5845		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
   5846		break;
   5847	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
   5848		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
   5849		break;
   5850	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
   5851		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
   5852		break;
   5853	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
   5854		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
   5855		break;
   5856	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
   5857		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
   5858		break;
   5859	default:
   5860		break;
   5861	}
   5862	return 0;
   5863}
   5864
   5865static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
   5866			    struct amdgpu_irq_src *source,
   5867			    struct amdgpu_iv_entry *entry)
   5868{
   5869	int i;
   5870	u8 me_id, pipe_id, queue_id;
   5871	struct amdgpu_ring *ring;
   5872
   5873	DRM_DEBUG("IH: CP EOP\n");
   5874	me_id = (entry->ring_id & 0x0c) >> 2;
   5875	pipe_id = (entry->ring_id & 0x03) >> 0;
   5876	queue_id = (entry->ring_id & 0x70) >> 4;
   5877
   5878	switch (me_id) {
   5879	case 0:
   5880		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
   5881		break;
   5882	case 1:
   5883	case 2:
   5884		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   5885			ring = &adev->gfx.compute_ring[i];
   5886			/* Per-queue interrupt is supported for MEC starting from VI.
   5887			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
   5888			  */
   5889			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
   5890				amdgpu_fence_process(ring);
   5891		}
   5892		break;
   5893	}
   5894	return 0;
   5895}
   5896
   5897static void gfx_v9_0_fault(struct amdgpu_device *adev,
   5898			   struct amdgpu_iv_entry *entry)
   5899{
   5900	u8 me_id, pipe_id, queue_id;
   5901	struct amdgpu_ring *ring;
   5902	int i;
   5903
   5904	me_id = (entry->ring_id & 0x0c) >> 2;
   5905	pipe_id = (entry->ring_id & 0x03) >> 0;
   5906	queue_id = (entry->ring_id & 0x70) >> 4;
   5907
   5908	switch (me_id) {
   5909	case 0:
   5910		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
   5911		break;
   5912	case 1:
   5913	case 2:
   5914		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   5915			ring = &adev->gfx.compute_ring[i];
   5916			if (ring->me == me_id && ring->pipe == pipe_id &&
   5917			    ring->queue == queue_id)
   5918				drm_sched_fault(&ring->sched);
   5919		}
   5920		break;
   5921	}
   5922}
   5923
   5924static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
   5925				 struct amdgpu_irq_src *source,
   5926				 struct amdgpu_iv_entry *entry)
   5927{
   5928	DRM_ERROR("Illegal register access in command stream\n");
   5929	gfx_v9_0_fault(adev, entry);
   5930	return 0;
   5931}
   5932
   5933static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
   5934				  struct amdgpu_irq_src *source,
   5935				  struct amdgpu_iv_entry *entry)
   5936{
   5937	DRM_ERROR("Illegal instruction in command stream\n");
   5938	gfx_v9_0_fault(adev, entry);
   5939	return 0;
   5940}
   5941
   5942
   5943static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
   5944	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
   5945	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
   5946	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
   5947	},
   5948	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
   5949	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
   5950	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
   5951	},
   5952	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
   5953	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
   5954	  0, 0
   5955	},
   5956	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
   5957	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
   5958	  0, 0
   5959	},
   5960	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
   5961	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
   5962	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
   5963	},
   5964	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
   5965	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
   5966	  0, 0
   5967	},
   5968	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
   5969	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
   5970	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
   5971	},
   5972	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
   5973	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
   5974	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
   5975	},
   5976	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
   5977	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
   5978	  0, 0
   5979	},
   5980	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
   5981	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
   5982	  0, 0
   5983	},
   5984	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
   5985	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
   5986	  0, 0
   5987	},
   5988	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
   5989	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
   5990	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
   5991	},
   5992	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
   5993	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
   5994	  0, 0
   5995	},
   5996	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
   5997	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
   5998	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
   5999	},
   6000	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
   6001	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
   6002	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
   6003	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
   6004	},
   6005	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
   6006	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
   6007	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
   6008	  0, 0
   6009	},
   6010	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
   6011	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
   6012	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
   6013	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
   6014	},
   6015	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
   6016	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
   6017	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
   6018	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
   6019	},
   6020	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
   6021	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
   6022	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
   6023	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
   6024	},
   6025	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
   6026	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
   6027	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
   6028	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
   6029	},
   6030	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
   6031	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
   6032	  0, 0
   6033	},
   6034	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
   6035	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
   6036	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
   6037	},
   6038	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
   6039	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
   6040	  0, 0
   6041	},
   6042	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
   6043	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
   6044	  0, 0
   6045	},
   6046	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
   6047	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
   6048	  0, 0
   6049	},
   6050	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
   6051	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
   6052	  0, 0
   6053	},
   6054	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
   6055	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
   6056	  0, 0
   6057	},
   6058	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
   6059	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
   6060	  0, 0
   6061	},
   6062	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
   6063	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
   6064	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
   6065	},
   6066	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
   6067	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
   6068	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
   6069	},
   6070	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
   6071	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
   6072	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
   6073	},
   6074	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
   6075	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
   6076	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
   6077	},
   6078	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
   6079	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
   6080	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
   6081	},
   6082	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
   6083	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
   6084	  0, 0
   6085	},
   6086	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
   6087	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
   6088	  0, 0
   6089	},
   6090	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
   6091	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
   6092	  0, 0
   6093	},
   6094	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
   6095	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
   6096	  0, 0
   6097	},
   6098	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
   6099	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
   6100	  0, 0
   6101	},
   6102	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
   6103	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
   6104	  0, 0
   6105	},
   6106	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
   6107	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
   6108	  0, 0
   6109	},
   6110	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
   6111	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
   6112	  0, 0
   6113	},
   6114	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
   6115	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
   6116	  0, 0
   6117	},
   6118	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
   6119	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
   6120	  0, 0
   6121	},
   6122	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
   6123	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
   6124	  0, 0
   6125	},
   6126	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
   6127	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
   6128	  0, 0
   6129	},
   6130	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
   6131	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
   6132	  0, 0
   6133	},
   6134	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
   6135	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
   6136	  0, 0
   6137	},
   6138	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
   6139	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
   6140	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
   6141	},
   6142	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
   6143	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
   6144	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
   6145	},
   6146	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
   6147	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
   6148	  0, 0
   6149	},
   6150	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
   6151	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
   6152	  0, 0
   6153	},
   6154	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
   6155	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
   6156	  0, 0
   6157	},
   6158	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
   6159	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
   6160	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
   6161	},
   6162	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
   6163	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
   6164	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
   6165	},
   6166	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
   6167	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
   6168	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
   6169	},
   6170	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
   6171	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
   6172	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
   6173	},
   6174	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
   6175	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
   6176	  0, 0
   6177	},
   6178	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
   6179	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
   6180	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
   6181	},
   6182	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
   6183	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
   6184	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
   6185	},
   6186	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
   6187	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
   6188	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
   6189	},
   6190	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
   6191	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
   6192	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
   6193	},
   6194	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
   6195	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
   6196	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
   6197	},
   6198	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
   6199	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
   6200	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
   6201	},
   6202	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
   6203	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
   6204	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
   6205	},
   6206	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
   6207	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
   6208	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
   6209	},
   6210	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
   6211	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
   6212	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
   6213	},
   6214	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
   6215	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
   6216	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
   6217	},
   6218	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
   6219	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
   6220	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
   6221	},
   6222	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
   6223	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
   6224	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
   6225	},
   6226	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
   6227	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
   6228	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
   6229	},
   6230	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
   6231	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
   6232	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
   6233	},
   6234	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
   6235	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
   6236	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
   6237	},
   6238	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
   6239	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
   6240	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
   6241	},
   6242	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
   6243	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
   6244	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
   6245	},
   6246	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
   6247	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
   6248	  0, 0
   6249	},
   6250	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
   6251	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
   6252	  0, 0
   6253	},
   6254	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
   6255	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
   6256	  0, 0
   6257	},
   6258	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
   6259	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
   6260	  0, 0
   6261	},
   6262	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
   6263	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
   6264	  0, 0
   6265	},
   6266	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
   6267	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
   6268	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
   6269	},
   6270	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
   6271	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
   6272	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
   6273	},
   6274	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
   6275	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
   6276	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
   6277	},
   6278	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
   6279	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
   6280	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
   6281	},
   6282	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
   6283	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
   6284	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
   6285	},
   6286	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
   6287	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
   6288	  0, 0
   6289	},
   6290	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
   6291	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
   6292	  0, 0
   6293	},
   6294	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
   6295	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
   6296	  0, 0
   6297	},
   6298	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
   6299	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
   6300	  0, 0
   6301	},
   6302	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
   6303	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
   6304	  0, 0
   6305	},
   6306	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
   6307	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
   6308	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
   6309	},
   6310	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
   6311	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
   6312	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
   6313	},
   6314	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
   6315	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
   6316	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
   6317	},
   6318	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
   6319	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
   6320	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
   6321	},
   6322	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
   6323	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
   6324	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
   6325	},
   6326	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
   6327	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
   6328	  0, 0
   6329	},
   6330	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
   6331	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
   6332	  0, 0
   6333	},
   6334	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
   6335	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
   6336	  0, 0
   6337	},
   6338	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
   6339	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
   6340	  0, 0
   6341	},
   6342	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
   6343	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
   6344	  0, 0
   6345	},
   6346	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
   6347	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
   6348	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
   6349	},
   6350	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
   6351	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
   6352	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
   6353	},
   6354	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
   6355	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
   6356	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
   6357	},
   6358	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
   6359	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
   6360	  0, 0
   6361	},
   6362	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
   6363	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
   6364	  0, 0
   6365	},
   6366	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
   6367	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
   6368	  0, 0
   6369	},
   6370	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
   6371	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
   6372	  0, 0
   6373	},
   6374	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
   6375	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
   6376	  0, 0
   6377	},
   6378	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
   6379	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
   6380	  0, 0
   6381	}
   6382};
   6383
   6384static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
   6385				     void *inject_if)
   6386{
   6387	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
   6388	int ret;
   6389	struct ta_ras_trigger_error_input block_info = { 0 };
   6390
   6391	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
   6392		return -EINVAL;
   6393
   6394	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
   6395		return -EINVAL;
   6396
   6397	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
   6398		return -EPERM;
   6399
   6400	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
   6401	      info->head.type)) {
   6402		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
   6403			ras_gfx_subblocks[info->head.sub_block_index].name,
   6404			info->head.type);
   6405		return -EPERM;
   6406	}
   6407
   6408	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
   6409	      info->head.type)) {
   6410		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
   6411			ras_gfx_subblocks[info->head.sub_block_index].name,
   6412			info->head.type);
   6413		return -EPERM;
   6414	}
   6415
   6416	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
   6417	block_info.sub_block_index =
   6418		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
   6419	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
   6420	block_info.address = info->address;
   6421	block_info.value = info->value;
   6422
   6423	mutex_lock(&adev->grbm_idx_mutex);
   6424	ret = psp_ras_trigger_error(&adev->psp, &block_info);
   6425	mutex_unlock(&adev->grbm_idx_mutex);
   6426
   6427	return ret;
   6428}
   6429
   6430static const char *vml2_mems[] = {
   6431	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
   6432	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
   6433	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
   6434	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
   6435	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
   6436	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
   6437	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
   6438	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
   6439	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
   6440	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
   6441	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
   6442	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
   6443	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
   6444	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
   6445	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
   6446	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
   6447};
   6448
   6449static const char *vml2_walker_mems[] = {
   6450	"UTC_VML2_CACHE_PDE0_MEM0",
   6451	"UTC_VML2_CACHE_PDE0_MEM1",
   6452	"UTC_VML2_CACHE_PDE1_MEM0",
   6453	"UTC_VML2_CACHE_PDE1_MEM1",
   6454	"UTC_VML2_CACHE_PDE2_MEM0",
   6455	"UTC_VML2_CACHE_PDE2_MEM1",
   6456	"UTC_VML2_RDIF_LOG_FIFO",
   6457};
   6458
   6459static const char *atc_l2_cache_2m_mems[] = {
   6460	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
   6461	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
   6462	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
   6463	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
   6464};
   6465
   6466static const char *atc_l2_cache_4k_mems[] = {
   6467	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
   6468	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
   6469	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
   6470	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
   6471	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
   6472	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
   6473	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
   6474	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
   6475	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
   6476	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
   6477	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
   6478	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
   6479	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
   6480	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
   6481	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
   6482	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
   6483	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
   6484	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
   6485	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
   6486	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
   6487	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
   6488	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
   6489	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
   6490	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
   6491	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
   6492	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
   6493	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
   6494	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
   6495	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
   6496	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
   6497	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
   6498	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
   6499};
   6500
   6501static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
   6502					 struct ras_err_data *err_data)
   6503{
   6504	uint32_t i, data;
   6505	uint32_t sec_count, ded_count;
   6506
   6507	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
   6508	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
   6509	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
   6510	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
   6511	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
   6512	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
   6513	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
   6514	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
   6515
   6516	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
   6517		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
   6518		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
   6519
   6520		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
   6521		if (sec_count) {
   6522			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
   6523				"SEC %d\n", i, vml2_mems[i], sec_count);
   6524			err_data->ce_count += sec_count;
   6525		}
   6526
   6527		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
   6528		if (ded_count) {
   6529			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
   6530				"DED %d\n", i, vml2_mems[i], ded_count);
   6531			err_data->ue_count += ded_count;
   6532		}
   6533	}
   6534
   6535	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
   6536		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
   6537		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
   6538
   6539		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
   6540						SEC_COUNT);
   6541		if (sec_count) {
   6542			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
   6543				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
   6544			err_data->ce_count += sec_count;
   6545		}
   6546
   6547		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
   6548						DED_COUNT);
   6549		if (ded_count) {
   6550			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
   6551				"DED %d\n", i, vml2_walker_mems[i], ded_count);
   6552			err_data->ue_count += ded_count;
   6553		}
   6554	}
   6555
   6556	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
   6557		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
   6558		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
   6559
   6560		sec_count = (data & 0x00006000L) >> 0xd;
   6561		if (sec_count) {
   6562			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
   6563				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
   6564				sec_count);
   6565			err_data->ce_count += sec_count;
   6566		}
   6567	}
   6568
   6569	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
   6570		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
   6571		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
   6572
   6573		sec_count = (data & 0x00006000L) >> 0xd;
   6574		if (sec_count) {
   6575			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
   6576				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
   6577				sec_count);
   6578			err_data->ce_count += sec_count;
   6579		}
   6580
   6581		ded_count = (data & 0x00018000L) >> 0xf;
   6582		if (ded_count) {
   6583			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
   6584				"DED %d\n", i, atc_l2_cache_4k_mems[i],
   6585				ded_count);
   6586			err_data->ue_count += ded_count;
   6587		}
   6588	}
   6589
   6590	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
   6591	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
   6592	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
   6593	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
   6594
   6595	return 0;
   6596}
   6597
   6598static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
   6599	const struct soc15_reg_entry *reg,
   6600	uint32_t se_id, uint32_t inst_id, uint32_t value,
   6601	uint32_t *sec_count, uint32_t *ded_count)
   6602{
   6603	uint32_t i;
   6604	uint32_t sec_cnt, ded_cnt;
   6605
   6606	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
   6607		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
   6608			gfx_v9_0_ras_fields[i].seg != reg->seg ||
   6609			gfx_v9_0_ras_fields[i].inst != reg->inst)
   6610			continue;
   6611
   6612		sec_cnt = (value &
   6613				gfx_v9_0_ras_fields[i].sec_count_mask) >>
   6614				gfx_v9_0_ras_fields[i].sec_count_shift;
   6615		if (sec_cnt) {
   6616			dev_info(adev->dev, "GFX SubBlock %s, "
   6617				"Instance[%d][%d], SEC %d\n",
   6618				gfx_v9_0_ras_fields[i].name,
   6619				se_id, inst_id,
   6620				sec_cnt);
   6621			*sec_count += sec_cnt;
   6622		}
   6623
   6624		ded_cnt = (value &
   6625				gfx_v9_0_ras_fields[i].ded_count_mask) >>
   6626				gfx_v9_0_ras_fields[i].ded_count_shift;
   6627		if (ded_cnt) {
   6628			dev_info(adev->dev, "GFX SubBlock %s, "
   6629				"Instance[%d][%d], DED %d\n",
   6630				gfx_v9_0_ras_fields[i].name,
   6631				se_id, inst_id,
   6632				ded_cnt);
   6633			*ded_count += ded_cnt;
   6634		}
   6635	}
   6636
   6637	return 0;
   6638}
   6639
   6640static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
   6641{
   6642	int i, j, k;
   6643
   6644	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
   6645		return;
   6646
   6647	/* read back registers to clear the counters */
   6648	mutex_lock(&adev->grbm_idx_mutex);
   6649	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
   6650		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
   6651			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
   6652				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
   6653				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
   6654			}
   6655		}
   6656	}
   6657	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
   6658	mutex_unlock(&adev->grbm_idx_mutex);
   6659
   6660	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
   6661	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
   6662	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
   6663	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
   6664	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
   6665	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
   6666	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
   6667	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
   6668
   6669	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
   6670		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
   6671		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
   6672	}
   6673
   6674	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
   6675		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
   6676		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
   6677	}
   6678
   6679	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
   6680		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
   6681		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
   6682	}
   6683
   6684	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
   6685		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
   6686		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
   6687	}
   6688
   6689	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
   6690	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
   6691	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
   6692	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
   6693}
   6694
   6695static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
   6696					  void *ras_error_status)
   6697{
   6698	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
   6699	uint32_t sec_count = 0, ded_count = 0;
   6700	uint32_t i, j, k;
   6701	uint32_t reg_value;
   6702
   6703	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
   6704		return;
   6705
   6706	err_data->ue_count = 0;
   6707	err_data->ce_count = 0;
   6708
   6709	mutex_lock(&adev->grbm_idx_mutex);
   6710
   6711	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
   6712		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
   6713			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
   6714				gfx_v9_0_select_se_sh(adev, j, 0, k);
   6715				reg_value =
   6716					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
   6717				if (reg_value)
   6718					gfx_v9_0_ras_error_count(adev,
   6719						&gfx_v9_0_edc_counter_regs[i],
   6720						j, k, reg_value,
   6721						&sec_count, &ded_count);
   6722			}
   6723		}
   6724	}
   6725
   6726	err_data->ce_count += sec_count;
   6727	err_data->ue_count += ded_count;
   6728
   6729	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   6730	mutex_unlock(&adev->grbm_idx_mutex);
   6731
   6732	gfx_v9_0_query_utc_edc_status(adev, err_data);
   6733}
   6734
   6735static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
   6736{
   6737	const unsigned int cp_coher_cntl =
   6738			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
   6739			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
   6740			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
   6741			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
   6742			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
   6743
   6744	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
   6745	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
   6746	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
   6747	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
   6748	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
   6749	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
   6750	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
   6751	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
   6752}
   6753
   6754static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
   6755					uint32_t pipe, bool enable)
   6756{
   6757	struct amdgpu_device *adev = ring->adev;
   6758	uint32_t val;
   6759	uint32_t wcl_cs_reg;
   6760
   6761	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
   6762	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
   6763
   6764	switch (pipe) {
   6765	case 0:
   6766		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
   6767		break;
   6768	case 1:
   6769		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
   6770		break;
   6771	case 2:
   6772		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
   6773		break;
   6774	case 3:
   6775		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
   6776		break;
   6777	default:
   6778		DRM_DEBUG("invalid pipe %d\n", pipe);
   6779		return;
   6780	}
   6781
   6782	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
   6783
   6784}
   6785static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
   6786{
   6787	struct amdgpu_device *adev = ring->adev;
   6788	uint32_t val;
   6789	int i;
   6790
   6791
   6792	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
   6793	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
   6794	 * around 25% of gpu resources.
   6795	 */
   6796	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
   6797	amdgpu_ring_emit_wreg(ring,
   6798			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
   6799			      val);
   6800
   6801	/* Restrict waves for normal/low priority compute queues as well
   6802	 * to get best QoS for high priority compute jobs.
   6803	 *
   6804	 * amdgpu controls only 1st ME(0-3 CS pipes).
   6805	 */
   6806	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
   6807		if (i != ring->pipe)
   6808			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
   6809
   6810	}
   6811}
   6812
   6813static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
   6814	.name = "gfx_v9_0",
   6815	.early_init = gfx_v9_0_early_init,
   6816	.late_init = gfx_v9_0_late_init,
   6817	.sw_init = gfx_v9_0_sw_init,
   6818	.sw_fini = gfx_v9_0_sw_fini,
   6819	.hw_init = gfx_v9_0_hw_init,
   6820	.hw_fini = gfx_v9_0_hw_fini,
   6821	.suspend = gfx_v9_0_suspend,
   6822	.resume = gfx_v9_0_resume,
   6823	.is_idle = gfx_v9_0_is_idle,
   6824	.wait_for_idle = gfx_v9_0_wait_for_idle,
   6825	.soft_reset = gfx_v9_0_soft_reset,
   6826	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
   6827	.set_powergating_state = gfx_v9_0_set_powergating_state,
   6828	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
   6829};
   6830
   6831static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
   6832	.type = AMDGPU_RING_TYPE_GFX,
   6833	.align_mask = 0xff,
   6834	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
   6835	.support_64bit_ptrs = true,
   6836	.secure_submission_supported = true,
   6837	.vmhub = AMDGPU_GFXHUB_0,
   6838	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
   6839	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
   6840	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
   6841	.emit_frame_size = /* totally 242 maximum if 16 IBs */
   6842		5 +  /* COND_EXEC */
   6843		7 +  /* PIPELINE_SYNC */
   6844		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
   6845		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
   6846		2 + /* VM_FLUSH */
   6847		8 +  /* FENCE for VM_FLUSH */
   6848		20 + /* GDS switch */
   6849		4 + /* double SWITCH_BUFFER,
   6850		       the first COND_EXEC jump to the place just
   6851			   prior to this double SWITCH_BUFFER  */
   6852		5 + /* COND_EXEC */
   6853		7 +	 /*	HDP_flush */
   6854		4 +	 /*	VGT_flush */
   6855		14 + /*	CE_META */
   6856		31 + /*	DE_META */
   6857		3 + /* CNTX_CTRL */
   6858		5 + /* HDP_INVL */
   6859		8 + 8 + /* FENCE x2 */
   6860		2 + /* SWITCH_BUFFER */
   6861		7, /* gfx_v9_0_emit_mem_sync */
   6862	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
   6863	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
   6864	.emit_fence = gfx_v9_0_ring_emit_fence,
   6865	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
   6866	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
   6867	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
   6868	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
   6869	.test_ring = gfx_v9_0_ring_test_ring,
   6870	.test_ib = gfx_v9_0_ring_test_ib,
   6871	.insert_nop = amdgpu_ring_insert_nop,
   6872	.pad_ib = amdgpu_ring_generic_pad_ib,
   6873	.emit_switch_buffer = gfx_v9_ring_emit_sb,
   6874	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
   6875	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
   6876	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
   6877	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
   6878	.emit_wreg = gfx_v9_0_ring_emit_wreg,
   6879	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
   6880	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
   6881	.soft_recovery = gfx_v9_0_ring_soft_recovery,
   6882	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
   6883};
   6884
   6885static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
   6886	.type = AMDGPU_RING_TYPE_COMPUTE,
   6887	.align_mask = 0xff,
   6888	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
   6889	.support_64bit_ptrs = true,
   6890	.vmhub = AMDGPU_GFXHUB_0,
   6891	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
   6892	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
   6893	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
   6894	.emit_frame_size =
   6895		20 + /* gfx_v9_0_ring_emit_gds_switch */
   6896		7 + /* gfx_v9_0_ring_emit_hdp_flush */
   6897		5 + /* hdp invalidate */
   6898		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
   6899		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
   6900		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
   6901		2 + /* gfx_v9_0_ring_emit_vm_flush */
   6902		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
   6903		7 + /* gfx_v9_0_emit_mem_sync */
   6904		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
   6905		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
   6906	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
   6907	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
   6908	.emit_fence = gfx_v9_0_ring_emit_fence,
   6909	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
   6910	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
   6911	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
   6912	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
   6913	.test_ring = gfx_v9_0_ring_test_ring,
   6914	.test_ib = gfx_v9_0_ring_test_ib,
   6915	.insert_nop = amdgpu_ring_insert_nop,
   6916	.pad_ib = amdgpu_ring_generic_pad_ib,
   6917	.emit_wreg = gfx_v9_0_ring_emit_wreg,
   6918	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
   6919	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
   6920	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
   6921	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
   6922};
   6923
   6924static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
   6925	.type = AMDGPU_RING_TYPE_KIQ,
   6926	.align_mask = 0xff,
   6927	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
   6928	.support_64bit_ptrs = true,
   6929	.vmhub = AMDGPU_GFXHUB_0,
   6930	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
   6931	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
   6932	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
   6933	.emit_frame_size =
   6934		20 + /* gfx_v9_0_ring_emit_gds_switch */
   6935		7 + /* gfx_v9_0_ring_emit_hdp_flush */
   6936		5 + /* hdp invalidate */
   6937		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
   6938		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
   6939		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
   6940		2 + /* gfx_v9_0_ring_emit_vm_flush */
   6941		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
   6942	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
   6943	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
   6944	.test_ring = gfx_v9_0_ring_test_ring,
   6945	.insert_nop = amdgpu_ring_insert_nop,
   6946	.pad_ib = amdgpu_ring_generic_pad_ib,
   6947	.emit_rreg = gfx_v9_0_ring_emit_rreg,
   6948	.emit_wreg = gfx_v9_0_ring_emit_wreg,
   6949	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
   6950	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
   6951};
   6952
   6953static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
   6954{
   6955	int i;
   6956
   6957	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
   6958
   6959	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
   6960		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
   6961
   6962	for (i = 0; i < adev->gfx.num_compute_rings; i++)
   6963		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
   6964}
   6965
   6966static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
   6967	.set = gfx_v9_0_set_eop_interrupt_state,
   6968	.process = gfx_v9_0_eop_irq,
   6969};
   6970
   6971static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
   6972	.set = gfx_v9_0_set_priv_reg_fault_state,
   6973	.process = gfx_v9_0_priv_reg_irq,
   6974};
   6975
   6976static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
   6977	.set = gfx_v9_0_set_priv_inst_fault_state,
   6978	.process = gfx_v9_0_priv_inst_irq,
   6979};
   6980
   6981static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
   6982	.set = gfx_v9_0_set_cp_ecc_error_state,
   6983	.process = amdgpu_gfx_cp_ecc_error_irq,
   6984};
   6985
   6986
   6987static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
   6988{
   6989	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
   6990	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
   6991
   6992	adev->gfx.priv_reg_irq.num_types = 1;
   6993	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
   6994
   6995	adev->gfx.priv_inst_irq.num_types = 1;
   6996	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
   6997
   6998	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
   6999	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
   7000}
   7001
   7002static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
   7003{
   7004	switch (adev->ip_versions[GC_HWIP][0]) {
   7005	case IP_VERSION(9, 0, 1):
   7006	case IP_VERSION(9, 2, 1):
   7007	case IP_VERSION(9, 4, 0):
   7008	case IP_VERSION(9, 2, 2):
   7009	case IP_VERSION(9, 1, 0):
   7010	case IP_VERSION(9, 4, 1):
   7011	case IP_VERSION(9, 3, 0):
   7012	case IP_VERSION(9, 4, 2):
   7013		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
   7014		break;
   7015	default:
   7016		break;
   7017	}
   7018}
   7019
   7020static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
   7021{
   7022	/* init asci gds info */
   7023	switch (adev->ip_versions[GC_HWIP][0]) {
   7024	case IP_VERSION(9, 0, 1):
   7025	case IP_VERSION(9, 2, 1):
   7026	case IP_VERSION(9, 4, 0):
   7027		adev->gds.gds_size = 0x10000;
   7028		break;
   7029	case IP_VERSION(9, 2, 2):
   7030	case IP_VERSION(9, 1, 0):
   7031	case IP_VERSION(9, 4, 1):
   7032		adev->gds.gds_size = 0x1000;
   7033		break;
   7034	case IP_VERSION(9, 4, 2):
   7035		/* aldebaran removed all the GDS internal memory,
   7036		 * only support GWS opcode in kernel, like barrier
   7037		 * semaphore.etc */
   7038		adev->gds.gds_size = 0;
   7039		break;
   7040	default:
   7041		adev->gds.gds_size = 0x10000;
   7042		break;
   7043	}
   7044
   7045	switch (adev->ip_versions[GC_HWIP][0]) {
   7046	case IP_VERSION(9, 0, 1):
   7047	case IP_VERSION(9, 4, 0):
   7048		adev->gds.gds_compute_max_wave_id = 0x7ff;
   7049		break;
   7050	case IP_VERSION(9, 2, 1):
   7051		adev->gds.gds_compute_max_wave_id = 0x27f;
   7052		break;
   7053	case IP_VERSION(9, 2, 2):
   7054	case IP_VERSION(9, 1, 0):
   7055		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
   7056			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
   7057		else
   7058			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
   7059		break;
   7060	case IP_VERSION(9, 4, 1):
   7061		adev->gds.gds_compute_max_wave_id = 0xfff;
   7062		break;
   7063	case IP_VERSION(9, 4, 2):
   7064		/* deprecated for Aldebaran, no usage at all */
   7065		adev->gds.gds_compute_max_wave_id = 0;
   7066		break;
   7067	default:
   7068		/* this really depends on the chip */
   7069		adev->gds.gds_compute_max_wave_id = 0x7ff;
   7070		break;
   7071	}
   7072
   7073	adev->gds.gws_size = 64;
   7074	adev->gds.oa_size = 16;
   7075}
   7076
   7077static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
   7078						 u32 bitmap)
   7079{
   7080	u32 data;
   7081
   7082	if (!bitmap)
   7083		return;
   7084
   7085	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
   7086	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
   7087
   7088	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
   7089}
   7090
   7091static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
   7092{
   7093	u32 data, mask;
   7094
   7095	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
   7096	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
   7097
   7098	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
   7099	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
   7100
   7101	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
   7102
   7103	return (~data) & mask;
   7104}
   7105
   7106static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
   7107				 struct amdgpu_cu_info *cu_info)
   7108{
   7109	int i, j, k, counter, active_cu_number = 0;
   7110	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
   7111	unsigned disable_masks[4 * 4];
   7112
   7113	if (!adev || !cu_info)
   7114		return -EINVAL;
   7115
   7116	/*
   7117	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
   7118	 */
   7119	if (adev->gfx.config.max_shader_engines *
   7120		adev->gfx.config.max_sh_per_se > 16)
   7121		return -EINVAL;
   7122
   7123	amdgpu_gfx_parse_disable_cu(disable_masks,
   7124				    adev->gfx.config.max_shader_engines,
   7125				    adev->gfx.config.max_sh_per_se);
   7126
   7127	mutex_lock(&adev->grbm_idx_mutex);
   7128	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   7129		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   7130			mask = 1;
   7131			ao_bitmap = 0;
   7132			counter = 0;
   7133			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
   7134			gfx_v9_0_set_user_cu_inactive_bitmap(
   7135				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
   7136			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
   7137
   7138			/*
   7139			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
   7140			 * 4x4 size array, and it's usually suitable for Vega
   7141			 * ASICs which has 4*2 SE/SH layout.
   7142			 * But for Arcturus, SE/SH layout is changed to 8*1.
   7143			 * To mostly reduce the impact, we make it compatible
   7144			 * with current bitmap array as below:
   7145			 *    SE4,SH0 --> bitmap[0][1]
   7146			 *    SE5,SH0 --> bitmap[1][1]
   7147			 *    SE6,SH0 --> bitmap[2][1]
   7148			 *    SE7,SH0 --> bitmap[3][1]
   7149			 */
   7150			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
   7151
   7152			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
   7153				if (bitmap & mask) {
   7154					if (counter < adev->gfx.config.max_cu_per_sh)
   7155						ao_bitmap |= mask;
   7156					counter ++;
   7157				}
   7158				mask <<= 1;
   7159			}
   7160			active_cu_number += counter;
   7161			if (i < 2 && j < 2)
   7162				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
   7163			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
   7164		}
   7165	}
   7166	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   7167	mutex_unlock(&adev->grbm_idx_mutex);
   7168
   7169	cu_info->number = active_cu_number;
   7170	cu_info->ao_cu_mask = ao_cu_mask;
   7171	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
   7172
   7173	return 0;
   7174}
   7175
   7176const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
   7177{
   7178	.type = AMD_IP_BLOCK_TYPE_GFX,
   7179	.major = 9,
   7180	.minor = 0,
   7181	.rev = 0,
   7182	.funcs = &gfx_v9_0_ip_funcs,
   7183};