cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

gfx_v8_0.c (248729B)


      1/*
      2 * Copyright 2014 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 */
     23
     24#include <linux/delay.h>
     25#include <linux/kernel.h>
     26#include <linux/firmware.h>
     27#include <linux/module.h>
     28#include <linux/pci.h>
     29
     30#include "amdgpu.h"
     31#include "amdgpu_gfx.h"
     32#include "amdgpu_ring.h"
     33#include "vi.h"
     34#include "vi_structs.h"
     35#include "vid.h"
     36#include "amdgpu_ucode.h"
     37#include "amdgpu_atombios.h"
     38#include "atombios_i2c.h"
     39#include "clearstate_vi.h"
     40
     41#include "gmc/gmc_8_2_d.h"
     42#include "gmc/gmc_8_2_sh_mask.h"
     43
     44#include "oss/oss_3_0_d.h"
     45#include "oss/oss_3_0_sh_mask.h"
     46
     47#include "bif/bif_5_0_d.h"
     48#include "bif/bif_5_0_sh_mask.h"
     49#include "gca/gfx_8_0_d.h"
     50#include "gca/gfx_8_0_enum.h"
     51#include "gca/gfx_8_0_sh_mask.h"
     52
     53#include "dce/dce_10_0_d.h"
     54#include "dce/dce_10_0_sh_mask.h"
     55
     56#include "smu/smu_7_1_3_d.h"
     57
     58#include "ivsrcid/ivsrcid_vislands30.h"
     59
     60#define GFX8_NUM_GFX_RINGS     1
     61#define GFX8_MEC_HPD_SIZE 4096
     62
     63#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
     64#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
     65#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
     66#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
     67
     68#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
     69#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
     70#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
     71#define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
     72#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
     73#define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
     74#define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
     75#define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
     76#define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
     77
     78#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
     79#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
     80#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
     81#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
     82#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
     83#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
     84
     85/* BPM SERDES CMD */
     86#define SET_BPM_SERDES_CMD    1
     87#define CLE_BPM_SERDES_CMD    0
     88
     89/* BPM Register Address*/
     90enum {
     91	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
     92	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
     93	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
     94	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
     95	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
     96	BPM_REG_FGCG_MAX
     97};
     98
     99#define RLC_FormatDirectRegListLength        14
    100
    101MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
    102MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
    103MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
    104MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
    105MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
    106MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
    107
    108MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
    109MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
    110MODULE_FIRMWARE("amdgpu/stoney_me.bin");
    111MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
    112MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
    113
    114MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
    115MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
    116MODULE_FIRMWARE("amdgpu/tonga_me.bin");
    117MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
    118MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
    119MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
    120
    121MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
    122MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
    123MODULE_FIRMWARE("amdgpu/topaz_me.bin");
    124MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
    125MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
    126
    127MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
    128MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
    129MODULE_FIRMWARE("amdgpu/fiji_me.bin");
    130MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
    131MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
    132MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
    133
    134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
    135MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
    136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
    137MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
    138MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
    139MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
    140MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
    141MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
    142MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
    143MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
    144MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
    145
    146MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
    147MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
    148MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
    149MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
    150MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
    151MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
    152MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
    153MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
    154MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
    155MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
    156MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
    157
    158MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
    159MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
    160MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
    161MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
    162MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
    163MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
    164MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
    165MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
    166MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
    167MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
    168MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
    169
    170MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
    171MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
    172MODULE_FIRMWARE("amdgpu/vegam_me.bin");
    173MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
    174MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
    175MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
    176
    177static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
    178{
    179	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
    180	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
    181	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
    182	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
    183	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
    184	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
    185	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
    186	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
    187	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
    188	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
    189	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
    190	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
    191	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
    192	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
    193	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
    194	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
    195};
    196
    197static const u32 golden_settings_tonga_a11[] =
    198{
    199	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
    200	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
    201	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
    202	mmGB_GPU_ID, 0x0000000f, 0x00000000,
    203	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
    204	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
    205	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
    206	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
    207	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
    208	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
    209	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
    210	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
    211	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
    212	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
    213	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
    214	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
    215};
    216
    217static const u32 tonga_golden_common_all[] =
    218{
    219	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    220	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
    221	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
    222	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
    223	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
    224	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
    225	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
    226	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
    227};
    228
    229static const u32 tonga_mgcg_cgcg_init[] =
    230{
    231	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
    232	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    233	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
    234	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
    235	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
    236	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
    237	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
    238	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
    239	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
    240	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
    241	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
    242	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
    243	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
    244	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
    245	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
    246	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
    247	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
    248	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
    249	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
    250	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
    251	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
    252	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
    253	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
    254	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
    255	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
    256	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
    257	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
    258	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
    259	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
    260	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
    261	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    262	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    263	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    264	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
    265	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    266	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    267	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    268	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    269	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
    270	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    271	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    272	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    273	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    274	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
    275	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    276	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    277	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    278	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    279	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
    280	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    281	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    282	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    283	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    284	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
    285	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    286	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    287	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    288	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    289	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
    290	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    291	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    292	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    293	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    294	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
    295	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    296	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    297	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    298	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    299	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
    300	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    301	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    302	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
    303	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
    304	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
    305	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
    306};
    307
    308static const u32 golden_settings_vegam_a11[] =
    309{
    310	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
    311	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
    312	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
    313	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
    314	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
    315	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
    316	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
    317	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
    318	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
    319	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
    320	mmSQ_CONFIG, 0x07f80000, 0x01180000,
    321	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
    322	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
    323	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
    324	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
    325	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
    326	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
    327};
    328
    329static const u32 vegam_golden_common_all[] =
    330{
    331	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    332	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
    333	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
    334	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
    335	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
    336	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
    337};
    338
    339static const u32 golden_settings_polaris11_a11[] =
    340{
    341	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
    342	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
    343	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
    344	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
    345	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
    346	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
    347	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
    348	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
    349	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
    350	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
    351	mmSQ_CONFIG, 0x07f80000, 0x01180000,
    352	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
    353	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
    354	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
    355	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
    356	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
    357	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
    358};
    359
    360static const u32 polaris11_golden_common_all[] =
    361{
    362	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    363	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
    364	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
    365	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
    366	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
    367	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
    368};
    369
    370static const u32 golden_settings_polaris10_a11[] =
    371{
    372	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
    373	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
    374	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
    375	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
    376	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
    377	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
    378	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
    379	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
    380	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
    381	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
    382	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
    383	mmSQ_CONFIG, 0x07f80000, 0x07180000,
    384	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
    385	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
    386	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
    387	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
    388	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
    389};
    390
    391static const u32 polaris10_golden_common_all[] =
    392{
    393	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    394	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
    395	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
    396	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
    397	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
    398	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
    399	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
    400	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
    401};
    402
    403static const u32 fiji_golden_common_all[] =
    404{
    405	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    406	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
    407	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
    408	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
    409	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
    410	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
    411	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
    412	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
    413	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    414	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
    415};
    416
    417static const u32 golden_settings_fiji_a10[] =
    418{
    419	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
    420	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
    421	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
    422	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
    423	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
    424	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
    425	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
    426	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
    427	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
    428	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
    429	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
    430};
    431
    432static const u32 fiji_mgcg_cgcg_init[] =
    433{
    434	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
    435	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    436	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
    437	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
    438	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
    439	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
    440	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
    441	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
    442	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
    443	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
    444	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
    445	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
    446	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
    447	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
    448	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
    449	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
    450	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
    451	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
    452	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
    453	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
    454	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
    455	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
    456	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
    457	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
    458	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
    459	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
    460	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
    461	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
    462	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
    463	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
    464	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    465	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
    466	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
    467	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
    468	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
    469};
    470
    471static const u32 golden_settings_iceland_a11[] =
    472{
    473	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
    474	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
    475	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
    476	mmGB_GPU_ID, 0x0000000f, 0x00000000,
    477	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
    478	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
    479	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
    480	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
    481	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
    482	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
    483	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
    484	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
    485	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
    486	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
    487	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
    488	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
    489};
    490
    491static const u32 iceland_golden_common_all[] =
    492{
    493	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    494	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
    495	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
    496	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
    497	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
    498	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
    499	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
    500	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
    501};
    502
    503static const u32 iceland_mgcg_cgcg_init[] =
    504{
    505	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
    506	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    507	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
    508	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
    509	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
    510	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
    511	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
    512	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
    513	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
    514	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
    515	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
    516	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
    517	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
    518	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
    519	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
    520	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
    521	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
    522	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
    523	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
    524	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
    525	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
    526	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
    527	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
    528	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
    529	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
    530	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
    531	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
    532	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
    533	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
    534	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
    535	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    536	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    537	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    538	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
    539	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    540	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    541	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    542	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    543	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
    544	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    545	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    546	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    547	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    548	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
    549	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    550	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    551	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    552	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    553	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
    554	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    555	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    556	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    557	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    558	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
    559	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    560	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    561	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    562	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    563	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
    564	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    565	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    566	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
    567	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
    568	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
    569};
    570
    571static const u32 cz_golden_settings_a11[] =
    572{
    573	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
    574	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
    575	mmGB_GPU_ID, 0x0000000f, 0x00000000,
    576	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
    577	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
    578	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
    579	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
    580	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
    581	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
    582	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
    583	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
    584	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
    585};
    586
    587static const u32 cz_golden_common_all[] =
    588{
    589	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    590	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
    591	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
    592	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
    593	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
    594	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
    595	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
    596	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
    597};
    598
    599static const u32 cz_mgcg_cgcg_init[] =
    600{
    601	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
    602	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    603	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
    604	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
    605	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
    606	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
    607	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
    608	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
    609	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
    610	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
    611	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
    612	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
    613	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
    614	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
    615	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
    616	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
    617	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
    618	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
    619	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
    620	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
    621	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
    622	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
    623	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
    624	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
    625	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
    626	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
    627	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
    628	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
    629	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
    630	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
    631	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    632	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    633	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    634	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
    635	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    636	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    637	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    638	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    639	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
    640	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    641	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    642	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    643	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    644	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
    645	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    646	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    647	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    648	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    649	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
    650	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    651	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    652	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    653	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    654	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
    655	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    656	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    657	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    658	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    659	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
    660	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    661	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    662	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    663	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    664	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
    665	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    666	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    667	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
    668	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
    669	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
    670	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
    671	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
    672	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
    673	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
    674	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
    675	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
    676};
    677
    678static const u32 stoney_golden_settings_a11[] =
    679{
    680	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
    681	mmGB_GPU_ID, 0x0000000f, 0x00000000,
    682	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
    683	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
    684	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
    685	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
    686	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
    687	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
    688	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
    689	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
    690};
    691
    692static const u32 stoney_golden_common_all[] =
    693{
    694	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    695	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
    696	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
    697	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
    698	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
    699	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
    700	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
    701	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
    702};
    703
    704static const u32 stoney_mgcg_cgcg_init[] =
    705{
    706	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
    707	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
    708	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
    709	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
    710	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
    711};
    712
    713
    714static const char * const sq_edc_source_names[] = {
    715	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
    716	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
    717	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
    718	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
    719	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
    720	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
    721	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
    722};
    723
    724static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
    725static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
    726static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
    727static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
    728static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
    729static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
    730static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
    731static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
    732
    733#define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
    734#define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
    735
    736static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
    737{
    738	uint32_t data;
    739
    740	switch (adev->asic_type) {
    741	case CHIP_TOPAZ:
    742		amdgpu_device_program_register_sequence(adev,
    743							iceland_mgcg_cgcg_init,
    744							ARRAY_SIZE(iceland_mgcg_cgcg_init));
    745		amdgpu_device_program_register_sequence(adev,
    746							golden_settings_iceland_a11,
    747							ARRAY_SIZE(golden_settings_iceland_a11));
    748		amdgpu_device_program_register_sequence(adev,
    749							iceland_golden_common_all,
    750							ARRAY_SIZE(iceland_golden_common_all));
    751		break;
    752	case CHIP_FIJI:
    753		amdgpu_device_program_register_sequence(adev,
    754							fiji_mgcg_cgcg_init,
    755							ARRAY_SIZE(fiji_mgcg_cgcg_init));
    756		amdgpu_device_program_register_sequence(adev,
    757							golden_settings_fiji_a10,
    758							ARRAY_SIZE(golden_settings_fiji_a10));
    759		amdgpu_device_program_register_sequence(adev,
    760							fiji_golden_common_all,
    761							ARRAY_SIZE(fiji_golden_common_all));
    762		break;
    763
    764	case CHIP_TONGA:
    765		amdgpu_device_program_register_sequence(adev,
    766							tonga_mgcg_cgcg_init,
    767							ARRAY_SIZE(tonga_mgcg_cgcg_init));
    768		amdgpu_device_program_register_sequence(adev,
    769							golden_settings_tonga_a11,
    770							ARRAY_SIZE(golden_settings_tonga_a11));
    771		amdgpu_device_program_register_sequence(adev,
    772							tonga_golden_common_all,
    773							ARRAY_SIZE(tonga_golden_common_all));
    774		break;
    775	case CHIP_VEGAM:
    776		amdgpu_device_program_register_sequence(adev,
    777							golden_settings_vegam_a11,
    778							ARRAY_SIZE(golden_settings_vegam_a11));
    779		amdgpu_device_program_register_sequence(adev,
    780							vegam_golden_common_all,
    781							ARRAY_SIZE(vegam_golden_common_all));
    782		break;
    783	case CHIP_POLARIS11:
    784	case CHIP_POLARIS12:
    785		amdgpu_device_program_register_sequence(adev,
    786							golden_settings_polaris11_a11,
    787							ARRAY_SIZE(golden_settings_polaris11_a11));
    788		amdgpu_device_program_register_sequence(adev,
    789							polaris11_golden_common_all,
    790							ARRAY_SIZE(polaris11_golden_common_all));
    791		break;
    792	case CHIP_POLARIS10:
    793		amdgpu_device_program_register_sequence(adev,
    794							golden_settings_polaris10_a11,
    795							ARRAY_SIZE(golden_settings_polaris10_a11));
    796		amdgpu_device_program_register_sequence(adev,
    797							polaris10_golden_common_all,
    798							ARRAY_SIZE(polaris10_golden_common_all));
    799		data = RREG32_SMC(ixCG_ACLK_CNTL);
    800		data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
    801		data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
    802		WREG32_SMC(ixCG_ACLK_CNTL, data);
    803		if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
    804		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
    805		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
    806		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
    807			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
    808			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
    809		}
    810		break;
    811	case CHIP_CARRIZO:
    812		amdgpu_device_program_register_sequence(adev,
    813							cz_mgcg_cgcg_init,
    814							ARRAY_SIZE(cz_mgcg_cgcg_init));
    815		amdgpu_device_program_register_sequence(adev,
    816							cz_golden_settings_a11,
    817							ARRAY_SIZE(cz_golden_settings_a11));
    818		amdgpu_device_program_register_sequence(adev,
    819							cz_golden_common_all,
    820							ARRAY_SIZE(cz_golden_common_all));
    821		break;
    822	case CHIP_STONEY:
    823		amdgpu_device_program_register_sequence(adev,
    824							stoney_mgcg_cgcg_init,
    825							ARRAY_SIZE(stoney_mgcg_cgcg_init));
    826		amdgpu_device_program_register_sequence(adev,
    827							stoney_golden_settings_a11,
    828							ARRAY_SIZE(stoney_golden_settings_a11));
    829		amdgpu_device_program_register_sequence(adev,
    830							stoney_golden_common_all,
    831							ARRAY_SIZE(stoney_golden_common_all));
    832		break;
    833	default:
    834		break;
    835	}
    836}
    837
    838static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
    839{
    840	struct amdgpu_device *adev = ring->adev;
    841	uint32_t tmp = 0;
    842	unsigned i;
    843	int r;
    844
    845	WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
    846	r = amdgpu_ring_alloc(ring, 3);
    847	if (r)
    848		return r;
    849
    850	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
    851	amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
    852	amdgpu_ring_write(ring, 0xDEADBEEF);
    853	amdgpu_ring_commit(ring);
    854
    855	for (i = 0; i < adev->usec_timeout; i++) {
    856		tmp = RREG32(mmSCRATCH_REG0);
    857		if (tmp == 0xDEADBEEF)
    858			break;
    859		udelay(1);
    860	}
    861
    862	if (i >= adev->usec_timeout)
    863		r = -ETIMEDOUT;
    864
    865	return r;
    866}
    867
    868static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
    869{
    870	struct amdgpu_device *adev = ring->adev;
    871	struct amdgpu_ib ib;
    872	struct dma_fence *f = NULL;
    873
    874	unsigned int index;
    875	uint64_t gpu_addr;
    876	uint32_t tmp;
    877	long r;
    878
    879	r = amdgpu_device_wb_get(adev, &index);
    880	if (r)
    881		return r;
    882
    883	gpu_addr = adev->wb.gpu_addr + (index * 4);
    884	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
    885	memset(&ib, 0, sizeof(ib));
    886	r = amdgpu_ib_get(adev, NULL, 16,
    887					AMDGPU_IB_POOL_DIRECT, &ib);
    888	if (r)
    889		goto err1;
    890
    891	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
    892	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
    893	ib.ptr[2] = lower_32_bits(gpu_addr);
    894	ib.ptr[3] = upper_32_bits(gpu_addr);
    895	ib.ptr[4] = 0xDEADBEEF;
    896	ib.length_dw = 5;
    897
    898	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
    899	if (r)
    900		goto err2;
    901
    902	r = dma_fence_wait_timeout(f, false, timeout);
    903	if (r == 0) {
    904		r = -ETIMEDOUT;
    905		goto err2;
    906	} else if (r < 0) {
    907		goto err2;
    908	}
    909
    910	tmp = adev->wb.wb[index];
    911	if (tmp == 0xDEADBEEF)
    912		r = 0;
    913	else
    914		r = -EINVAL;
    915
    916err2:
    917	amdgpu_ib_free(adev, &ib, NULL);
    918	dma_fence_put(f);
    919err1:
    920	amdgpu_device_wb_free(adev, index);
    921	return r;
    922}
    923
    924
    925static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
    926{
    927	release_firmware(adev->gfx.pfp_fw);
    928	adev->gfx.pfp_fw = NULL;
    929	release_firmware(adev->gfx.me_fw);
    930	adev->gfx.me_fw = NULL;
    931	release_firmware(adev->gfx.ce_fw);
    932	adev->gfx.ce_fw = NULL;
    933	release_firmware(adev->gfx.rlc_fw);
    934	adev->gfx.rlc_fw = NULL;
    935	release_firmware(adev->gfx.mec_fw);
    936	adev->gfx.mec_fw = NULL;
    937	if ((adev->asic_type != CHIP_STONEY) &&
    938	    (adev->asic_type != CHIP_TOPAZ))
    939		release_firmware(adev->gfx.mec2_fw);
    940	adev->gfx.mec2_fw = NULL;
    941
    942	kfree(adev->gfx.rlc.register_list_format);
    943}
    944
    945static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
    946{
    947	const char *chip_name;
    948	char fw_name[30];
    949	int err;
    950	struct amdgpu_firmware_info *info = NULL;
    951	const struct common_firmware_header *header = NULL;
    952	const struct gfx_firmware_header_v1_0 *cp_hdr;
    953	const struct rlc_firmware_header_v2_0 *rlc_hdr;
    954	unsigned int *tmp = NULL, i;
    955
    956	DRM_DEBUG("\n");
    957
    958	switch (adev->asic_type) {
    959	case CHIP_TOPAZ:
    960		chip_name = "topaz";
    961		break;
    962	case CHIP_TONGA:
    963		chip_name = "tonga";
    964		break;
    965	case CHIP_CARRIZO:
    966		chip_name = "carrizo";
    967		break;
    968	case CHIP_FIJI:
    969		chip_name = "fiji";
    970		break;
    971	case CHIP_STONEY:
    972		chip_name = "stoney";
    973		break;
    974	case CHIP_POLARIS10:
    975		chip_name = "polaris10";
    976		break;
    977	case CHIP_POLARIS11:
    978		chip_name = "polaris11";
    979		break;
    980	case CHIP_POLARIS12:
    981		chip_name = "polaris12";
    982		break;
    983	case CHIP_VEGAM:
    984		chip_name = "vegam";
    985		break;
    986	default:
    987		BUG();
    988	}
    989
    990	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
    991		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
    992		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
    993		if (err == -ENOENT) {
    994			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
    995			err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
    996		}
    997	} else {
    998		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
    999		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
   1000	}
   1001	if (err)
   1002		goto out;
   1003	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
   1004	if (err)
   1005		goto out;
   1006	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
   1007	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
   1008	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
   1009
   1010	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
   1011		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
   1012		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
   1013		if (err == -ENOENT) {
   1014			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
   1015			err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
   1016		}
   1017	} else {
   1018		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
   1019		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
   1020	}
   1021	if (err)
   1022		goto out;
   1023	err = amdgpu_ucode_validate(adev->gfx.me_fw);
   1024	if (err)
   1025		goto out;
   1026	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
   1027	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
   1028
   1029	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
   1030
   1031	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
   1032		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
   1033		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
   1034		if (err == -ENOENT) {
   1035			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
   1036			err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
   1037		}
   1038	} else {
   1039		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
   1040		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
   1041	}
   1042	if (err)
   1043		goto out;
   1044	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
   1045	if (err)
   1046		goto out;
   1047	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
   1048	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
   1049	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
   1050
   1051	/*
   1052	 * Support for MCBP/Virtualization in combination with chained IBs is
   1053	 * formal released on feature version #46
   1054	 */
   1055	if (adev->gfx.ce_feature_version >= 46 &&
   1056	    adev->gfx.pfp_feature_version >= 46) {
   1057		adev->virt.chained_ib_support = true;
   1058		DRM_INFO("Chained IB support enabled!\n");
   1059	} else
   1060		adev->virt.chained_ib_support = false;
   1061
   1062	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
   1063	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
   1064	if (err)
   1065		goto out;
   1066	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
   1067	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
   1068	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
   1069	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
   1070
   1071	adev->gfx.rlc.save_and_restore_offset =
   1072			le32_to_cpu(rlc_hdr->save_and_restore_offset);
   1073	adev->gfx.rlc.clear_state_descriptor_offset =
   1074			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
   1075	adev->gfx.rlc.avail_scratch_ram_locations =
   1076			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
   1077	adev->gfx.rlc.reg_restore_list_size =
   1078			le32_to_cpu(rlc_hdr->reg_restore_list_size);
   1079	adev->gfx.rlc.reg_list_format_start =
   1080			le32_to_cpu(rlc_hdr->reg_list_format_start);
   1081	adev->gfx.rlc.reg_list_format_separate_start =
   1082			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
   1083	adev->gfx.rlc.starting_offsets_start =
   1084			le32_to_cpu(rlc_hdr->starting_offsets_start);
   1085	adev->gfx.rlc.reg_list_format_size_bytes =
   1086			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
   1087	adev->gfx.rlc.reg_list_size_bytes =
   1088			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
   1089
   1090	adev->gfx.rlc.register_list_format =
   1091			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
   1092					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
   1093
   1094	if (!adev->gfx.rlc.register_list_format) {
   1095		err = -ENOMEM;
   1096		goto out;
   1097	}
   1098
   1099	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
   1100			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
   1101	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
   1102		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
   1103
   1104	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
   1105
   1106	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
   1107			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
   1108	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
   1109		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
   1110
   1111	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
   1112		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
   1113		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
   1114		if (err == -ENOENT) {
   1115			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
   1116			err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
   1117		}
   1118	} else {
   1119		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
   1120		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
   1121	}
   1122	if (err)
   1123		goto out;
   1124	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
   1125	if (err)
   1126		goto out;
   1127	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
   1128	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
   1129	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
   1130
   1131	if ((adev->asic_type != CHIP_STONEY) &&
   1132	    (adev->asic_type != CHIP_TOPAZ)) {
   1133		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
   1134			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
   1135			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
   1136			if (err == -ENOENT) {
   1137				snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
   1138				err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
   1139			}
   1140		} else {
   1141			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
   1142			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
   1143		}
   1144		if (!err) {
   1145			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
   1146			if (err)
   1147				goto out;
   1148			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
   1149				adev->gfx.mec2_fw->data;
   1150			adev->gfx.mec2_fw_version =
   1151				le32_to_cpu(cp_hdr->header.ucode_version);
   1152			adev->gfx.mec2_feature_version =
   1153				le32_to_cpu(cp_hdr->ucode_feature_version);
   1154		} else {
   1155			err = 0;
   1156			adev->gfx.mec2_fw = NULL;
   1157		}
   1158	}
   1159
   1160	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
   1161	info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
   1162	info->fw = adev->gfx.pfp_fw;
   1163	header = (const struct common_firmware_header *)info->fw->data;
   1164	adev->firmware.fw_size +=
   1165		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
   1166
   1167	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
   1168	info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
   1169	info->fw = adev->gfx.me_fw;
   1170	header = (const struct common_firmware_header *)info->fw->data;
   1171	adev->firmware.fw_size +=
   1172		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
   1173
   1174	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
   1175	info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
   1176	info->fw = adev->gfx.ce_fw;
   1177	header = (const struct common_firmware_header *)info->fw->data;
   1178	adev->firmware.fw_size +=
   1179		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
   1180
   1181	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
   1182	info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
   1183	info->fw = adev->gfx.rlc_fw;
   1184	header = (const struct common_firmware_header *)info->fw->data;
   1185	adev->firmware.fw_size +=
   1186		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
   1187
   1188	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
   1189	info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
   1190	info->fw = adev->gfx.mec_fw;
   1191	header = (const struct common_firmware_header *)info->fw->data;
   1192	adev->firmware.fw_size +=
   1193		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
   1194
   1195	/* we need account JT in */
   1196	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
   1197	adev->firmware.fw_size +=
   1198		ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
   1199
   1200	if (amdgpu_sriov_vf(adev)) {
   1201		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
   1202		info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
   1203		info->fw = adev->gfx.mec_fw;
   1204		adev->firmware.fw_size +=
   1205			ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
   1206	}
   1207
   1208	if (adev->gfx.mec2_fw) {
   1209		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
   1210		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
   1211		info->fw = adev->gfx.mec2_fw;
   1212		header = (const struct common_firmware_header *)info->fw->data;
   1213		adev->firmware.fw_size +=
   1214			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
   1215	}
   1216
   1217out:
   1218	if (err) {
   1219		dev_err(adev->dev,
   1220			"gfx8: Failed to load firmware \"%s\"\n",
   1221			fw_name);
   1222		release_firmware(adev->gfx.pfp_fw);
   1223		adev->gfx.pfp_fw = NULL;
   1224		release_firmware(adev->gfx.me_fw);
   1225		adev->gfx.me_fw = NULL;
   1226		release_firmware(adev->gfx.ce_fw);
   1227		adev->gfx.ce_fw = NULL;
   1228		release_firmware(adev->gfx.rlc_fw);
   1229		adev->gfx.rlc_fw = NULL;
   1230		release_firmware(adev->gfx.mec_fw);
   1231		adev->gfx.mec_fw = NULL;
   1232		release_firmware(adev->gfx.mec2_fw);
   1233		adev->gfx.mec2_fw = NULL;
   1234	}
   1235	return err;
   1236}
   1237
   1238static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
   1239				    volatile u32 *buffer)
   1240{
   1241	u32 count = 0, i;
   1242	const struct cs_section_def *sect = NULL;
   1243	const struct cs_extent_def *ext = NULL;
   1244
   1245	if (adev->gfx.rlc.cs_data == NULL)
   1246		return;
   1247	if (buffer == NULL)
   1248		return;
   1249
   1250	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   1251	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
   1252
   1253	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   1254	buffer[count++] = cpu_to_le32(0x80000000);
   1255	buffer[count++] = cpu_to_le32(0x80000000);
   1256
   1257	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
   1258		for (ext = sect->section; ext->extent != NULL; ++ext) {
   1259			if (sect->id == SECT_CONTEXT) {
   1260				buffer[count++] =
   1261					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
   1262				buffer[count++] = cpu_to_le32(ext->reg_index -
   1263						PACKET3_SET_CONTEXT_REG_START);
   1264				for (i = 0; i < ext->reg_count; i++)
   1265					buffer[count++] = cpu_to_le32(ext->extent[i]);
   1266			} else {
   1267				return;
   1268			}
   1269		}
   1270	}
   1271
   1272	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
   1273	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
   1274			PACKET3_SET_CONTEXT_REG_START);
   1275	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
   1276	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
   1277
   1278	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   1279	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
   1280
   1281	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
   1282	buffer[count++] = cpu_to_le32(0);
   1283}
   1284
   1285static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
   1286{
   1287	if (adev->asic_type == CHIP_CARRIZO)
   1288		return 5;
   1289	else
   1290		return 4;
   1291}
   1292
   1293static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
   1294{
   1295	const struct cs_section_def *cs_data;
   1296	int r;
   1297
   1298	adev->gfx.rlc.cs_data = vi_cs_data;
   1299
   1300	cs_data = adev->gfx.rlc.cs_data;
   1301
   1302	if (cs_data) {
   1303		/* init clear state block */
   1304		r = amdgpu_gfx_rlc_init_csb(adev);
   1305		if (r)
   1306			return r;
   1307	}
   1308
   1309	if ((adev->asic_type == CHIP_CARRIZO) ||
   1310	    (adev->asic_type == CHIP_STONEY)) {
   1311		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
   1312		r = amdgpu_gfx_rlc_init_cpt(adev);
   1313		if (r)
   1314			return r;
   1315	}
   1316
   1317	/* init spm vmid with 0xf */
   1318	if (adev->gfx.rlc.funcs->update_spm_vmid)
   1319		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
   1320
   1321	return 0;
   1322}
   1323
   1324static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
   1325{
   1326	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
   1327}
   1328
   1329static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
   1330{
   1331	int r;
   1332	u32 *hpd;
   1333	size_t mec_hpd_size;
   1334
   1335	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
   1336
   1337	/* take ownership of the relevant compute queues */
   1338	amdgpu_gfx_compute_queue_acquire(adev);
   1339
   1340	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
   1341	if (mec_hpd_size) {
   1342		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
   1343					      AMDGPU_GEM_DOMAIN_VRAM,
   1344					      &adev->gfx.mec.hpd_eop_obj,
   1345					      &adev->gfx.mec.hpd_eop_gpu_addr,
   1346					      (void **)&hpd);
   1347		if (r) {
   1348			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
   1349			return r;
   1350		}
   1351
   1352		memset(hpd, 0, mec_hpd_size);
   1353
   1354		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
   1355		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
   1356	}
   1357
   1358	return 0;
   1359}
   1360
   1361static const u32 vgpr_init_compute_shader[] =
   1362{
   1363	0x7e000209, 0x7e020208,
   1364	0x7e040207, 0x7e060206,
   1365	0x7e080205, 0x7e0a0204,
   1366	0x7e0c0203, 0x7e0e0202,
   1367	0x7e100201, 0x7e120200,
   1368	0x7e140209, 0x7e160208,
   1369	0x7e180207, 0x7e1a0206,
   1370	0x7e1c0205, 0x7e1e0204,
   1371	0x7e200203, 0x7e220202,
   1372	0x7e240201, 0x7e260200,
   1373	0x7e280209, 0x7e2a0208,
   1374	0x7e2c0207, 0x7e2e0206,
   1375	0x7e300205, 0x7e320204,
   1376	0x7e340203, 0x7e360202,
   1377	0x7e380201, 0x7e3a0200,
   1378	0x7e3c0209, 0x7e3e0208,
   1379	0x7e400207, 0x7e420206,
   1380	0x7e440205, 0x7e460204,
   1381	0x7e480203, 0x7e4a0202,
   1382	0x7e4c0201, 0x7e4e0200,
   1383	0x7e500209, 0x7e520208,
   1384	0x7e540207, 0x7e560206,
   1385	0x7e580205, 0x7e5a0204,
   1386	0x7e5c0203, 0x7e5e0202,
   1387	0x7e600201, 0x7e620200,
   1388	0x7e640209, 0x7e660208,
   1389	0x7e680207, 0x7e6a0206,
   1390	0x7e6c0205, 0x7e6e0204,
   1391	0x7e700203, 0x7e720202,
   1392	0x7e740201, 0x7e760200,
   1393	0x7e780209, 0x7e7a0208,
   1394	0x7e7c0207, 0x7e7e0206,
   1395	0xbf8a0000, 0xbf810000,
   1396};
   1397
   1398static const u32 sgpr_init_compute_shader[] =
   1399{
   1400	0xbe8a0100, 0xbe8c0102,
   1401	0xbe8e0104, 0xbe900106,
   1402	0xbe920108, 0xbe940100,
   1403	0xbe960102, 0xbe980104,
   1404	0xbe9a0106, 0xbe9c0108,
   1405	0xbe9e0100, 0xbea00102,
   1406	0xbea20104, 0xbea40106,
   1407	0xbea60108, 0xbea80100,
   1408	0xbeaa0102, 0xbeac0104,
   1409	0xbeae0106, 0xbeb00108,
   1410	0xbeb20100, 0xbeb40102,
   1411	0xbeb60104, 0xbeb80106,
   1412	0xbeba0108, 0xbebc0100,
   1413	0xbebe0102, 0xbec00104,
   1414	0xbec20106, 0xbec40108,
   1415	0xbec60100, 0xbec80102,
   1416	0xbee60004, 0xbee70005,
   1417	0xbeea0006, 0xbeeb0007,
   1418	0xbee80008, 0xbee90009,
   1419	0xbefc0000, 0xbf8a0000,
   1420	0xbf810000, 0x00000000,
   1421};
   1422
   1423static const u32 vgpr_init_regs[] =
   1424{
   1425	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
   1426	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
   1427	mmCOMPUTE_NUM_THREAD_X, 256*4,
   1428	mmCOMPUTE_NUM_THREAD_Y, 1,
   1429	mmCOMPUTE_NUM_THREAD_Z, 1,
   1430	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
   1431	mmCOMPUTE_PGM_RSRC2, 20,
   1432	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
   1433	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
   1434	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
   1435	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
   1436	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
   1437	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
   1438	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
   1439	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
   1440	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
   1441	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
   1442};
   1443
   1444static const u32 sgpr1_init_regs[] =
   1445{
   1446	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
   1447	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
   1448	mmCOMPUTE_NUM_THREAD_X, 256*5,
   1449	mmCOMPUTE_NUM_THREAD_Y, 1,
   1450	mmCOMPUTE_NUM_THREAD_Z, 1,
   1451	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
   1452	mmCOMPUTE_PGM_RSRC2, 20,
   1453	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
   1454	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
   1455	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
   1456	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
   1457	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
   1458	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
   1459	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
   1460	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
   1461	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
   1462	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
   1463};
   1464
   1465static const u32 sgpr2_init_regs[] =
   1466{
   1467	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
   1468	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
   1469	mmCOMPUTE_NUM_THREAD_X, 256*5,
   1470	mmCOMPUTE_NUM_THREAD_Y, 1,
   1471	mmCOMPUTE_NUM_THREAD_Z, 1,
   1472	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
   1473	mmCOMPUTE_PGM_RSRC2, 20,
   1474	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
   1475	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
   1476	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
   1477	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
   1478	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
   1479	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
   1480	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
   1481	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
   1482	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
   1483	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
   1484};
   1485
   1486static const u32 sec_ded_counter_registers[] =
   1487{
   1488	mmCPC_EDC_ATC_CNT,
   1489	mmCPC_EDC_SCRATCH_CNT,
   1490	mmCPC_EDC_UCODE_CNT,
   1491	mmCPF_EDC_ATC_CNT,
   1492	mmCPF_EDC_ROQ_CNT,
   1493	mmCPF_EDC_TAG_CNT,
   1494	mmCPG_EDC_ATC_CNT,
   1495	mmCPG_EDC_DMA_CNT,
   1496	mmCPG_EDC_TAG_CNT,
   1497	mmDC_EDC_CSINVOC_CNT,
   1498	mmDC_EDC_RESTORE_CNT,
   1499	mmDC_EDC_STATE_CNT,
   1500	mmGDS_EDC_CNT,
   1501	mmGDS_EDC_GRBM_CNT,
   1502	mmGDS_EDC_OA_DED,
   1503	mmSPI_EDC_CNT,
   1504	mmSQC_ATC_EDC_GATCL1_CNT,
   1505	mmSQC_EDC_CNT,
   1506	mmSQ_EDC_DED_CNT,
   1507	mmSQ_EDC_INFO,
   1508	mmSQ_EDC_SEC_CNT,
   1509	mmTCC_EDC_CNT,
   1510	mmTCP_ATC_EDC_GATCL1_CNT,
   1511	mmTCP_EDC_CNT,
   1512	mmTD_EDC_CNT
   1513};
   1514
   1515static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
   1516{
   1517	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
   1518	struct amdgpu_ib ib;
   1519	struct dma_fence *f = NULL;
   1520	int r, i;
   1521	u32 tmp;
   1522	unsigned total_size, vgpr_offset, sgpr_offset;
   1523	u64 gpu_addr;
   1524
   1525	/* only supported on CZ */
   1526	if (adev->asic_type != CHIP_CARRIZO)
   1527		return 0;
   1528
   1529	/* bail if the compute ring is not ready */
   1530	if (!ring->sched.ready)
   1531		return 0;
   1532
   1533	tmp = RREG32(mmGB_EDC_MODE);
   1534	WREG32(mmGB_EDC_MODE, 0);
   1535
   1536	total_size =
   1537		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
   1538	total_size +=
   1539		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
   1540	total_size +=
   1541		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
   1542	total_size = ALIGN(total_size, 256);
   1543	vgpr_offset = total_size;
   1544	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
   1545	sgpr_offset = total_size;
   1546	total_size += sizeof(sgpr_init_compute_shader);
   1547
   1548	/* allocate an indirect buffer to put the commands in */
   1549	memset(&ib, 0, sizeof(ib));
   1550	r = amdgpu_ib_get(adev, NULL, total_size,
   1551					AMDGPU_IB_POOL_DIRECT, &ib);
   1552	if (r) {
   1553		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
   1554		return r;
   1555	}
   1556
   1557	/* load the compute shaders */
   1558	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
   1559		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
   1560
   1561	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
   1562		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
   1563
   1564	/* init the ib length to 0 */
   1565	ib.length_dw = 0;
   1566
   1567	/* VGPR */
   1568	/* write the register state for the compute dispatch */
   1569	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
   1570		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
   1571		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
   1572		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
   1573	}
   1574	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
   1575	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
   1576	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
   1577	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
   1578	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
   1579	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
   1580
   1581	/* write dispatch packet */
   1582	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
   1583	ib.ptr[ib.length_dw++] = 8; /* x */
   1584	ib.ptr[ib.length_dw++] = 1; /* y */
   1585	ib.ptr[ib.length_dw++] = 1; /* z */
   1586	ib.ptr[ib.length_dw++] =
   1587		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
   1588
   1589	/* write CS partial flush packet */
   1590	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
   1591	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
   1592
   1593	/* SGPR1 */
   1594	/* write the register state for the compute dispatch */
   1595	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
   1596		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
   1597		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
   1598		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
   1599	}
   1600	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
   1601	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
   1602	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
   1603	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
   1604	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
   1605	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
   1606
   1607	/* write dispatch packet */
   1608	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
   1609	ib.ptr[ib.length_dw++] = 8; /* x */
   1610	ib.ptr[ib.length_dw++] = 1; /* y */
   1611	ib.ptr[ib.length_dw++] = 1; /* z */
   1612	ib.ptr[ib.length_dw++] =
   1613		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
   1614
   1615	/* write CS partial flush packet */
   1616	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
   1617	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
   1618
   1619	/* SGPR2 */
   1620	/* write the register state for the compute dispatch */
   1621	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
   1622		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
   1623		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
   1624		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
   1625	}
   1626	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
   1627	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
   1628	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
   1629	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
   1630	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
   1631	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
   1632
   1633	/* write dispatch packet */
   1634	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
   1635	ib.ptr[ib.length_dw++] = 8; /* x */
   1636	ib.ptr[ib.length_dw++] = 1; /* y */
   1637	ib.ptr[ib.length_dw++] = 1; /* z */
   1638	ib.ptr[ib.length_dw++] =
   1639		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
   1640
   1641	/* write CS partial flush packet */
   1642	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
   1643	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
   1644
   1645	/* shedule the ib on the ring */
   1646	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
   1647	if (r) {
   1648		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
   1649		goto fail;
   1650	}
   1651
   1652	/* wait for the GPU to finish processing the IB */
   1653	r = dma_fence_wait(f, false);
   1654	if (r) {
   1655		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
   1656		goto fail;
   1657	}
   1658
   1659	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
   1660	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
   1661	WREG32(mmGB_EDC_MODE, tmp);
   1662
   1663	tmp = RREG32(mmCC_GC_EDC_CONFIG);
   1664	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
   1665	WREG32(mmCC_GC_EDC_CONFIG, tmp);
   1666
   1667
   1668	/* read back registers to clear the counters */
   1669	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
   1670		RREG32(sec_ded_counter_registers[i]);
   1671
   1672fail:
   1673	amdgpu_ib_free(adev, &ib, NULL);
   1674	dma_fence_put(f);
   1675
   1676	return r;
   1677}
   1678
   1679static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
   1680{
   1681	u32 gb_addr_config;
   1682	u32 mc_arb_ramcfg;
   1683	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
   1684	u32 tmp;
   1685	int ret;
   1686
   1687	switch (adev->asic_type) {
   1688	case CHIP_TOPAZ:
   1689		adev->gfx.config.max_shader_engines = 1;
   1690		adev->gfx.config.max_tile_pipes = 2;
   1691		adev->gfx.config.max_cu_per_sh = 6;
   1692		adev->gfx.config.max_sh_per_se = 1;
   1693		adev->gfx.config.max_backends_per_se = 2;
   1694		adev->gfx.config.max_texture_channel_caches = 2;
   1695		adev->gfx.config.max_gprs = 256;
   1696		adev->gfx.config.max_gs_threads = 32;
   1697		adev->gfx.config.max_hw_contexts = 8;
   1698
   1699		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   1700		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   1701		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   1702		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
   1703		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
   1704		break;
   1705	case CHIP_FIJI:
   1706		adev->gfx.config.max_shader_engines = 4;
   1707		adev->gfx.config.max_tile_pipes = 16;
   1708		adev->gfx.config.max_cu_per_sh = 16;
   1709		adev->gfx.config.max_sh_per_se = 1;
   1710		adev->gfx.config.max_backends_per_se = 4;
   1711		adev->gfx.config.max_texture_channel_caches = 16;
   1712		adev->gfx.config.max_gprs = 256;
   1713		adev->gfx.config.max_gs_threads = 32;
   1714		adev->gfx.config.max_hw_contexts = 8;
   1715
   1716		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   1717		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   1718		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   1719		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
   1720		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
   1721		break;
   1722	case CHIP_POLARIS11:
   1723	case CHIP_POLARIS12:
   1724		ret = amdgpu_atombios_get_gfx_info(adev);
   1725		if (ret)
   1726			return ret;
   1727		adev->gfx.config.max_gprs = 256;
   1728		adev->gfx.config.max_gs_threads = 32;
   1729		adev->gfx.config.max_hw_contexts = 8;
   1730
   1731		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   1732		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   1733		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   1734		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
   1735		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
   1736		break;
   1737	case CHIP_POLARIS10:
   1738	case CHIP_VEGAM:
   1739		ret = amdgpu_atombios_get_gfx_info(adev);
   1740		if (ret)
   1741			return ret;
   1742		adev->gfx.config.max_gprs = 256;
   1743		adev->gfx.config.max_gs_threads = 32;
   1744		adev->gfx.config.max_hw_contexts = 8;
   1745
   1746		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   1747		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   1748		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   1749		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
   1750		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
   1751		break;
   1752	case CHIP_TONGA:
   1753		adev->gfx.config.max_shader_engines = 4;
   1754		adev->gfx.config.max_tile_pipes = 8;
   1755		adev->gfx.config.max_cu_per_sh = 8;
   1756		adev->gfx.config.max_sh_per_se = 1;
   1757		adev->gfx.config.max_backends_per_se = 2;
   1758		adev->gfx.config.max_texture_channel_caches = 8;
   1759		adev->gfx.config.max_gprs = 256;
   1760		adev->gfx.config.max_gs_threads = 32;
   1761		adev->gfx.config.max_hw_contexts = 8;
   1762
   1763		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   1764		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   1765		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   1766		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
   1767		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
   1768		break;
   1769	case CHIP_CARRIZO:
   1770		adev->gfx.config.max_shader_engines = 1;
   1771		adev->gfx.config.max_tile_pipes = 2;
   1772		adev->gfx.config.max_sh_per_se = 1;
   1773		adev->gfx.config.max_backends_per_se = 2;
   1774		adev->gfx.config.max_cu_per_sh = 8;
   1775		adev->gfx.config.max_texture_channel_caches = 2;
   1776		adev->gfx.config.max_gprs = 256;
   1777		adev->gfx.config.max_gs_threads = 32;
   1778		adev->gfx.config.max_hw_contexts = 8;
   1779
   1780		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   1781		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   1782		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   1783		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
   1784		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
   1785		break;
   1786	case CHIP_STONEY:
   1787		adev->gfx.config.max_shader_engines = 1;
   1788		adev->gfx.config.max_tile_pipes = 2;
   1789		adev->gfx.config.max_sh_per_se = 1;
   1790		adev->gfx.config.max_backends_per_se = 1;
   1791		adev->gfx.config.max_cu_per_sh = 3;
   1792		adev->gfx.config.max_texture_channel_caches = 2;
   1793		adev->gfx.config.max_gprs = 256;
   1794		adev->gfx.config.max_gs_threads = 16;
   1795		adev->gfx.config.max_hw_contexts = 8;
   1796
   1797		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   1798		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   1799		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   1800		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
   1801		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
   1802		break;
   1803	default:
   1804		adev->gfx.config.max_shader_engines = 2;
   1805		adev->gfx.config.max_tile_pipes = 4;
   1806		adev->gfx.config.max_cu_per_sh = 2;
   1807		adev->gfx.config.max_sh_per_se = 1;
   1808		adev->gfx.config.max_backends_per_se = 2;
   1809		adev->gfx.config.max_texture_channel_caches = 4;
   1810		adev->gfx.config.max_gprs = 256;
   1811		adev->gfx.config.max_gs_threads = 32;
   1812		adev->gfx.config.max_hw_contexts = 8;
   1813
   1814		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   1815		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   1816		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   1817		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
   1818		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
   1819		break;
   1820	}
   1821
   1822	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
   1823	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
   1824
   1825	adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
   1826				MC_ARB_RAMCFG, NOOFBANK);
   1827	adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
   1828				MC_ARB_RAMCFG, NOOFRANKS);
   1829
   1830	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
   1831	adev->gfx.config.mem_max_burst_length_bytes = 256;
   1832	if (adev->flags & AMD_IS_APU) {
   1833		/* Get memory bank mapping mode. */
   1834		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
   1835		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
   1836		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
   1837
   1838		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
   1839		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
   1840		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
   1841
   1842		/* Validate settings in case only one DIMM installed. */
   1843		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
   1844			dimm00_addr_map = 0;
   1845		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
   1846			dimm01_addr_map = 0;
   1847		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
   1848			dimm10_addr_map = 0;
   1849		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
   1850			dimm11_addr_map = 0;
   1851
   1852		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
   1853		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
   1854		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
   1855			adev->gfx.config.mem_row_size_in_kb = 2;
   1856		else
   1857			adev->gfx.config.mem_row_size_in_kb = 1;
   1858	} else {
   1859		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
   1860		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
   1861		if (adev->gfx.config.mem_row_size_in_kb > 4)
   1862			adev->gfx.config.mem_row_size_in_kb = 4;
   1863	}
   1864
   1865	adev->gfx.config.shader_engine_tile_size = 32;
   1866	adev->gfx.config.num_gpus = 1;
   1867	adev->gfx.config.multi_gpu_tile_size = 64;
   1868
   1869	/* fix up row size */
   1870	switch (adev->gfx.config.mem_row_size_in_kb) {
   1871	case 1:
   1872	default:
   1873		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
   1874		break;
   1875	case 2:
   1876		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
   1877		break;
   1878	case 4:
   1879		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
   1880		break;
   1881	}
   1882	adev->gfx.config.gb_addr_config = gb_addr_config;
   1883
   1884	return 0;
   1885}
   1886
   1887static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
   1888					int mec, int pipe, int queue)
   1889{
   1890	int r;
   1891	unsigned irq_type;
   1892	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
   1893	unsigned int hw_prio;
   1894
   1895	ring = &adev->gfx.compute_ring[ring_id];
   1896
   1897	/* mec0 is me1 */
   1898	ring->me = mec + 1;
   1899	ring->pipe = pipe;
   1900	ring->queue = queue;
   1901
   1902	ring->ring_obj = NULL;
   1903	ring->use_doorbell = true;
   1904	ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
   1905	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
   1906				+ (ring_id * GFX8_MEC_HPD_SIZE);
   1907	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
   1908
   1909	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
   1910		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
   1911		+ ring->pipe;
   1912
   1913	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
   1914			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
   1915	/* type-2 packets are deprecated on MEC, use type-3 instead */
   1916	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
   1917			     hw_prio, NULL);
   1918	if (r)
   1919		return r;
   1920
   1921
   1922	return 0;
   1923}
   1924
   1925static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
   1926
   1927static int gfx_v8_0_sw_init(void *handle)
   1928{
   1929	int i, j, k, r, ring_id;
   1930	struct amdgpu_ring *ring;
   1931	struct amdgpu_kiq *kiq;
   1932	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   1933
   1934	switch (adev->asic_type) {
   1935	case CHIP_TONGA:
   1936	case CHIP_CARRIZO:
   1937	case CHIP_FIJI:
   1938	case CHIP_POLARIS10:
   1939	case CHIP_POLARIS11:
   1940	case CHIP_POLARIS12:
   1941	case CHIP_VEGAM:
   1942		adev->gfx.mec.num_mec = 2;
   1943		break;
   1944	case CHIP_TOPAZ:
   1945	case CHIP_STONEY:
   1946	default:
   1947		adev->gfx.mec.num_mec = 1;
   1948		break;
   1949	}
   1950
   1951	adev->gfx.mec.num_pipe_per_mec = 4;
   1952	adev->gfx.mec.num_queue_per_pipe = 8;
   1953
   1954	/* EOP Event */
   1955	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
   1956	if (r)
   1957		return r;
   1958
   1959	/* Privileged reg */
   1960	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
   1961			      &adev->gfx.priv_reg_irq);
   1962	if (r)
   1963		return r;
   1964
   1965	/* Privileged inst */
   1966	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
   1967			      &adev->gfx.priv_inst_irq);
   1968	if (r)
   1969		return r;
   1970
   1971	/* Add CP EDC/ECC irq  */
   1972	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
   1973			      &adev->gfx.cp_ecc_error_irq);
   1974	if (r)
   1975		return r;
   1976
   1977	/* SQ interrupts. */
   1978	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
   1979			      &adev->gfx.sq_irq);
   1980	if (r) {
   1981		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
   1982		return r;
   1983	}
   1984
   1985	INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
   1986
   1987	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
   1988
   1989	r = gfx_v8_0_init_microcode(adev);
   1990	if (r) {
   1991		DRM_ERROR("Failed to load gfx firmware!\n");
   1992		return r;
   1993	}
   1994
   1995	r = adev->gfx.rlc.funcs->init(adev);
   1996	if (r) {
   1997		DRM_ERROR("Failed to init rlc BOs!\n");
   1998		return r;
   1999	}
   2000
   2001	r = gfx_v8_0_mec_init(adev);
   2002	if (r) {
   2003		DRM_ERROR("Failed to init MEC BOs!\n");
   2004		return r;
   2005	}
   2006
   2007	/* set up the gfx ring */
   2008	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
   2009		ring = &adev->gfx.gfx_ring[i];
   2010		ring->ring_obj = NULL;
   2011		sprintf(ring->name, "gfx");
   2012		/* no gfx doorbells on iceland */
   2013		if (adev->asic_type != CHIP_TOPAZ) {
   2014			ring->use_doorbell = true;
   2015			ring->doorbell_index = adev->doorbell_index.gfx_ring0;
   2016		}
   2017
   2018		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
   2019				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
   2020				     AMDGPU_RING_PRIO_DEFAULT, NULL);
   2021		if (r)
   2022			return r;
   2023	}
   2024
   2025
   2026	/* set up the compute queues - allocate horizontally across pipes */
   2027	ring_id = 0;
   2028	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
   2029		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
   2030			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
   2031				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
   2032					continue;
   2033
   2034				r = gfx_v8_0_compute_ring_init(adev,
   2035								ring_id,
   2036								i, k, j);
   2037				if (r)
   2038					return r;
   2039
   2040				ring_id++;
   2041			}
   2042		}
   2043	}
   2044
   2045	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
   2046	if (r) {
   2047		DRM_ERROR("Failed to init KIQ BOs!\n");
   2048		return r;
   2049	}
   2050
   2051	kiq = &adev->gfx.kiq;
   2052	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
   2053	if (r)
   2054		return r;
   2055
   2056	/* create MQD for all compute queues as well as KIQ for SRIOV case */
   2057	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
   2058	if (r)
   2059		return r;
   2060
   2061	adev->gfx.ce_ram_size = 0x8000;
   2062
   2063	r = gfx_v8_0_gpu_early_init(adev);
   2064	if (r)
   2065		return r;
   2066
   2067	return 0;
   2068}
   2069
   2070static int gfx_v8_0_sw_fini(void *handle)
   2071{
   2072	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   2073	int i;
   2074
   2075	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
   2076		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
   2077	for (i = 0; i < adev->gfx.num_compute_rings; i++)
   2078		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
   2079
   2080	amdgpu_gfx_mqd_sw_fini(adev);
   2081	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
   2082	amdgpu_gfx_kiq_fini(adev);
   2083
   2084	gfx_v8_0_mec_fini(adev);
   2085	amdgpu_gfx_rlc_fini(adev);
   2086	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
   2087				&adev->gfx.rlc.clear_state_gpu_addr,
   2088				(void **)&adev->gfx.rlc.cs_ptr);
   2089	if ((adev->asic_type == CHIP_CARRIZO) ||
   2090	    (adev->asic_type == CHIP_STONEY)) {
   2091		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
   2092				&adev->gfx.rlc.cp_table_gpu_addr,
   2093				(void **)&adev->gfx.rlc.cp_table_ptr);
   2094	}
   2095	gfx_v8_0_free_microcode(adev);
   2096
   2097	return 0;
   2098}
   2099
   2100static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
   2101{
   2102	uint32_t *modearray, *mod2array;
   2103	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
   2104	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
   2105	u32 reg_offset;
   2106
   2107	modearray = adev->gfx.config.tile_mode_array;
   2108	mod2array = adev->gfx.config.macrotile_mode_array;
   2109
   2110	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   2111		modearray[reg_offset] = 0;
   2112
   2113	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
   2114		mod2array[reg_offset] = 0;
   2115
   2116	switch (adev->asic_type) {
   2117	case CHIP_TOPAZ:
   2118		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2119				PIPE_CONFIG(ADDR_SURF_P2) |
   2120				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
   2121				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2122		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2123				PIPE_CONFIG(ADDR_SURF_P2) |
   2124				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
   2125				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2126		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2127				PIPE_CONFIG(ADDR_SURF_P2) |
   2128				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
   2129				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2130		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2131				PIPE_CONFIG(ADDR_SURF_P2) |
   2132				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
   2133				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2134		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2135				PIPE_CONFIG(ADDR_SURF_P2) |
   2136				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2137				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2138		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2139				PIPE_CONFIG(ADDR_SURF_P2) |
   2140				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2141				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2142		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2143				PIPE_CONFIG(ADDR_SURF_P2) |
   2144				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2145				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2146		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2147				PIPE_CONFIG(ADDR_SURF_P2));
   2148		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2149				PIPE_CONFIG(ADDR_SURF_P2) |
   2150				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2151				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2152		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2153				 PIPE_CONFIG(ADDR_SURF_P2) |
   2154				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2155				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2156		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2157				 PIPE_CONFIG(ADDR_SURF_P2) |
   2158				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2159				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2160		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2161				 PIPE_CONFIG(ADDR_SURF_P2) |
   2162				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2163				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2164		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2165				 PIPE_CONFIG(ADDR_SURF_P2) |
   2166				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2167				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2168		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
   2169				 PIPE_CONFIG(ADDR_SURF_P2) |
   2170				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2171				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2172		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2173				 PIPE_CONFIG(ADDR_SURF_P2) |
   2174				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2175				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2176		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   2177				 PIPE_CONFIG(ADDR_SURF_P2) |
   2178				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2179				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2180		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   2181				 PIPE_CONFIG(ADDR_SURF_P2) |
   2182				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2183				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2184		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   2185				 PIPE_CONFIG(ADDR_SURF_P2) |
   2186				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2187				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2188		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
   2189				 PIPE_CONFIG(ADDR_SURF_P2) |
   2190				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2191				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2192		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   2193				 PIPE_CONFIG(ADDR_SURF_P2) |
   2194				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2195				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2196		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   2197				 PIPE_CONFIG(ADDR_SURF_P2) |
   2198				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2199				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2200		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
   2201				 PIPE_CONFIG(ADDR_SURF_P2) |
   2202				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2203				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2204		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
   2205				 PIPE_CONFIG(ADDR_SURF_P2) |
   2206				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2207				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2208		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2209				 PIPE_CONFIG(ADDR_SURF_P2) |
   2210				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2211				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2212		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2213				 PIPE_CONFIG(ADDR_SURF_P2) |
   2214				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2215				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2216		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2217				 PIPE_CONFIG(ADDR_SURF_P2) |
   2218				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2219				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2220
   2221		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   2222				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2223				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2224				NUM_BANKS(ADDR_SURF_8_BANK));
   2225		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   2226				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2227				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2228				NUM_BANKS(ADDR_SURF_8_BANK));
   2229		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   2230				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2231				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2232				NUM_BANKS(ADDR_SURF_8_BANK));
   2233		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2234				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2235				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2236				NUM_BANKS(ADDR_SURF_8_BANK));
   2237		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2238				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2239				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2240				NUM_BANKS(ADDR_SURF_8_BANK));
   2241		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2242				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2243				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2244				NUM_BANKS(ADDR_SURF_8_BANK));
   2245		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2246				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2247				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2248				NUM_BANKS(ADDR_SURF_8_BANK));
   2249		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   2250				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   2251				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2252				NUM_BANKS(ADDR_SURF_16_BANK));
   2253		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   2254				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2255				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2256				NUM_BANKS(ADDR_SURF_16_BANK));
   2257		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   2258				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2259				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2260				 NUM_BANKS(ADDR_SURF_16_BANK));
   2261		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   2262				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2263				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2264				 NUM_BANKS(ADDR_SURF_16_BANK));
   2265		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2266				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2267				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2268				 NUM_BANKS(ADDR_SURF_16_BANK));
   2269		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2270				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2271				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2272				 NUM_BANKS(ADDR_SURF_16_BANK));
   2273		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2274				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2275				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2276				 NUM_BANKS(ADDR_SURF_8_BANK));
   2277
   2278		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   2279			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
   2280			    reg_offset != 23)
   2281				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
   2282
   2283		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   2284			if (reg_offset != 7)
   2285				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
   2286
   2287		break;
   2288	case CHIP_FIJI:
   2289	case CHIP_VEGAM:
   2290		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2291				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2292				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
   2293				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2294		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2295				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2296				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
   2297				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2298		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2299				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2300				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
   2301				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2302		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2303				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2304				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
   2305				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2306		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2307				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2308				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2309				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2310		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2311				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2312				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2313				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2314		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2315				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2316				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2317				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2318		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2319				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2320				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2321				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2322		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2323				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
   2324		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2325				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2326				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2327				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2328		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2329				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2330				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2331				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2332		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2333				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2334				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2335				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2336		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2337				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2338				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2339				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2340		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2341				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2342				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2343				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2344		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2345				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2346				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2347				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2348		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
   2349				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2350				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2351				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2352		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2353				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2354				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2355				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2356		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2357				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2358				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2359				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2360		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   2361				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2362				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2363				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2364		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   2365				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2366				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2367				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2368		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   2369				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2370				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2371				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2372		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
   2373				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2374				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2375				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2376		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   2377				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2378				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2379				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2380		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   2381				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2382				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2383				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2384		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   2385				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2386				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2387				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2388		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
   2389				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2390				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2391				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2392		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
   2393				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2394				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2395				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2396		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2397				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2398				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2399				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2400		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2401				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2402				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2403				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2404		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2405				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2406				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2407				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2408		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2409				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2410				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2411				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2412
   2413		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2414				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2415				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2416				NUM_BANKS(ADDR_SURF_8_BANK));
   2417		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2418				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2419				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2420				NUM_BANKS(ADDR_SURF_8_BANK));
   2421		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2422				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2423				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2424				NUM_BANKS(ADDR_SURF_8_BANK));
   2425		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2426				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2427				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2428				NUM_BANKS(ADDR_SURF_8_BANK));
   2429		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2430				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2431				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2432				NUM_BANKS(ADDR_SURF_8_BANK));
   2433		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2434				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2435				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2436				NUM_BANKS(ADDR_SURF_8_BANK));
   2437		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2438				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2439				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2440				NUM_BANKS(ADDR_SURF_8_BANK));
   2441		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2442				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   2443				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2444				NUM_BANKS(ADDR_SURF_8_BANK));
   2445		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2446				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2447				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2448				NUM_BANKS(ADDR_SURF_8_BANK));
   2449		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2450				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2451				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2452				 NUM_BANKS(ADDR_SURF_8_BANK));
   2453		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2454				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2455				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2456				 NUM_BANKS(ADDR_SURF_8_BANK));
   2457		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2458				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2459				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2460				 NUM_BANKS(ADDR_SURF_8_BANK));
   2461		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2462				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2463				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2464				 NUM_BANKS(ADDR_SURF_8_BANK));
   2465		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2466				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2467				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2468				 NUM_BANKS(ADDR_SURF_4_BANK));
   2469
   2470		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   2471			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
   2472
   2473		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   2474			if (reg_offset != 7)
   2475				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
   2476
   2477		break;
   2478	case CHIP_TONGA:
   2479		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2480				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2481				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
   2482				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2483		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2484				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2485				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
   2486				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2487		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2488				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2489				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
   2490				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2491		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2492				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2493				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
   2494				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2495		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2496				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2497				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2498				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2499		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2500				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2501				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2502				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2503		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2504				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2505				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2506				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2507		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2508				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2509				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2510				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2511		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2512				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
   2513		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2514				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2515				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2516				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2517		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2518				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2519				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2520				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2521		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2522				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2523				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2524				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2525		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2526				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2527				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2528				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2529		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2530				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2531				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2532				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2533		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2534				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2535				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2536				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2537		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
   2538				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2539				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2540				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2541		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2542				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2543				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2544				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2545		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2546				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2547				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2548				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2549		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   2550				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2551				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2552				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2553		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   2554				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2555				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2556				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2557		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   2558				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2559				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2560				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2561		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
   2562				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2563				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2564				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2565		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   2566				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2567				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2568				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2569		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   2570				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2571				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2572				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2573		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   2574				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2575				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2576				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2577		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
   2578				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2579				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2580				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2581		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
   2582				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2583				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2584				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2585		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2586				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2587				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2588				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2589		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2590				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2591				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2592				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2593		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2594				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2595				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2596				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2597		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2598				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2599				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2600				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2601
   2602		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2603				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2604				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2605				NUM_BANKS(ADDR_SURF_16_BANK));
   2606		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2607				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2608				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2609				NUM_BANKS(ADDR_SURF_16_BANK));
   2610		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2611				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2612				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2613				NUM_BANKS(ADDR_SURF_16_BANK));
   2614		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2615				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2616				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2617				NUM_BANKS(ADDR_SURF_16_BANK));
   2618		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2619				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2620				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2621				NUM_BANKS(ADDR_SURF_16_BANK));
   2622		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2623				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2624				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2625				NUM_BANKS(ADDR_SURF_16_BANK));
   2626		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2627				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2628				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2629				NUM_BANKS(ADDR_SURF_16_BANK));
   2630		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2631				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   2632				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2633				NUM_BANKS(ADDR_SURF_16_BANK));
   2634		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2635				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2636				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2637				NUM_BANKS(ADDR_SURF_16_BANK));
   2638		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2639				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2640				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2641				 NUM_BANKS(ADDR_SURF_16_BANK));
   2642		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2643				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2644				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2645				 NUM_BANKS(ADDR_SURF_16_BANK));
   2646		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2647				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2648				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2649				 NUM_BANKS(ADDR_SURF_8_BANK));
   2650		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2651				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2652				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2653				 NUM_BANKS(ADDR_SURF_4_BANK));
   2654		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2655				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2656				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2657				 NUM_BANKS(ADDR_SURF_4_BANK));
   2658
   2659		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   2660			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
   2661
   2662		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   2663			if (reg_offset != 7)
   2664				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
   2665
   2666		break;
   2667	case CHIP_POLARIS11:
   2668	case CHIP_POLARIS12:
   2669		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2670				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2671				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
   2672				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2673		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2674				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2675				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
   2676				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2677		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2678				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2679				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
   2680				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2681		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2682				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2683				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
   2684				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2685		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2686				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2687				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2688				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2689		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2690				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2691				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2692				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2693		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2694				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2695				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2696				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2697		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2698				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2699				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2700				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2701		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2702				PIPE_CONFIG(ADDR_SURF_P4_16x16));
   2703		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2704				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2705				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2706				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2707		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2708				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2709				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2710				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2711		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2712				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2713				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2714				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2715		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2716				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2717				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2718				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2719		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2720				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2721				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2722				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2723		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2724				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2725				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2726				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2727		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
   2728				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2729				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2730				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2731		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2732				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2733				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2734				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2735		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2736				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2737				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2738				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2739		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   2740				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2741				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2742				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2743		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   2744				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2745				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2746				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2747		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   2748				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2749				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2750				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2751		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
   2752				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2753				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2754				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2755		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   2756				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2757				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2758				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2759		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   2760				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2761				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2762				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2763		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   2764				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2765				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2766				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2767		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
   2768				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2769				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2770				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2771		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
   2772				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2773				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2774				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2775		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2776				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2777				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2778				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2779		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2780				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2781				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2782				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2783		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2784				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2785				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2786				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2787		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2788				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2789				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2790				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2791
   2792		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2793				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2794				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2795				NUM_BANKS(ADDR_SURF_16_BANK));
   2796
   2797		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2798				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2799				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2800				NUM_BANKS(ADDR_SURF_16_BANK));
   2801
   2802		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2803				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2804				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2805				NUM_BANKS(ADDR_SURF_16_BANK));
   2806
   2807		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2808				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2809				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2810				NUM_BANKS(ADDR_SURF_16_BANK));
   2811
   2812		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2813				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2814				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2815				NUM_BANKS(ADDR_SURF_16_BANK));
   2816
   2817		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2818				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2819				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2820				NUM_BANKS(ADDR_SURF_16_BANK));
   2821
   2822		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2823				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2824				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2825				NUM_BANKS(ADDR_SURF_16_BANK));
   2826
   2827		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   2828				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   2829				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2830				NUM_BANKS(ADDR_SURF_16_BANK));
   2831
   2832		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   2833				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2834				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2835				NUM_BANKS(ADDR_SURF_16_BANK));
   2836
   2837		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2838				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2839				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2840				NUM_BANKS(ADDR_SURF_16_BANK));
   2841
   2842		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2843				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2844				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2845				NUM_BANKS(ADDR_SURF_16_BANK));
   2846
   2847		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2848				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2849				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2850				NUM_BANKS(ADDR_SURF_16_BANK));
   2851
   2852		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2853				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2854				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2855				NUM_BANKS(ADDR_SURF_8_BANK));
   2856
   2857		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2858				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2859				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2860				NUM_BANKS(ADDR_SURF_4_BANK));
   2861
   2862		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   2863			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
   2864
   2865		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   2866			if (reg_offset != 7)
   2867				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
   2868
   2869		break;
   2870	case CHIP_POLARIS10:
   2871		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2872				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2873				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
   2874				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2875		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2876				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2877				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
   2878				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2879		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2880				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2881				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
   2882				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2883		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2884				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2885				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
   2886				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2887		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2888				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2889				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2890				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2891		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2892				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2893				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2894				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2895		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2896				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2897				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2898				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2899		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2900				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2901				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   2902				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2903		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2904				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
   2905		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2906				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2907				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2908				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2909		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2910				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2911				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2912				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2913		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2914				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2915				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2916				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2917		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2918				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2919				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2920				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2921		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2922				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2923				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2924				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2925		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2926				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2927				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2928				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2929		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
   2930				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2931				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2932				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2933		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2934				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2935				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2936				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2937		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2938				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2939				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2940				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2941		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   2942				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2943				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2944				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2945		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   2946				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2947				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2948				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2949		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   2950				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2951				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2952				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2953		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
   2954				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2955				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2956				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2957		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   2958				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2959				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2960				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2961		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   2962				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2963				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2964				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2965		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   2966				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2967				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2968				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2969		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
   2970				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2971				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2972				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2973		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
   2974				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2975				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   2976				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   2977		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2978				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2979				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2980				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2981		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2982				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2983				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2984				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2985		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2986				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2987				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2988				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2989		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2990				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2991				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2992				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   2993
   2994		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2995				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2996				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2997				NUM_BANKS(ADDR_SURF_16_BANK));
   2998
   2999		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3000				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3001				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3002				NUM_BANKS(ADDR_SURF_16_BANK));
   3003
   3004		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3005				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3006				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3007				NUM_BANKS(ADDR_SURF_16_BANK));
   3008
   3009		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3010				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3011				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3012				NUM_BANKS(ADDR_SURF_16_BANK));
   3013
   3014		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3015				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3016				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3017				NUM_BANKS(ADDR_SURF_16_BANK));
   3018
   3019		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3020				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3021				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   3022				NUM_BANKS(ADDR_SURF_16_BANK));
   3023
   3024		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3025				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3026				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   3027				NUM_BANKS(ADDR_SURF_16_BANK));
   3028
   3029		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3030				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   3031				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3032				NUM_BANKS(ADDR_SURF_16_BANK));
   3033
   3034		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3035				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3036				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3037				NUM_BANKS(ADDR_SURF_16_BANK));
   3038
   3039		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3040				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3041				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3042				NUM_BANKS(ADDR_SURF_16_BANK));
   3043
   3044		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3045				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3046				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3047				NUM_BANKS(ADDR_SURF_16_BANK));
   3048
   3049		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3050				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3051				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   3052				NUM_BANKS(ADDR_SURF_8_BANK));
   3053
   3054		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3055				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3056				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   3057				NUM_BANKS(ADDR_SURF_4_BANK));
   3058
   3059		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3060				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3061				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   3062				NUM_BANKS(ADDR_SURF_4_BANK));
   3063
   3064		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   3065			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
   3066
   3067		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   3068			if (reg_offset != 7)
   3069				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
   3070
   3071		break;
   3072	case CHIP_STONEY:
   3073		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3074				PIPE_CONFIG(ADDR_SURF_P2) |
   3075				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
   3076				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3077		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3078				PIPE_CONFIG(ADDR_SURF_P2) |
   3079				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
   3080				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3081		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3082				PIPE_CONFIG(ADDR_SURF_P2) |
   3083				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
   3084				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3085		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3086				PIPE_CONFIG(ADDR_SURF_P2) |
   3087				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
   3088				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3089		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3090				PIPE_CONFIG(ADDR_SURF_P2) |
   3091				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   3092				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3093		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3094				PIPE_CONFIG(ADDR_SURF_P2) |
   3095				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   3096				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3097		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3098				PIPE_CONFIG(ADDR_SURF_P2) |
   3099				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   3100				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3101		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   3102				PIPE_CONFIG(ADDR_SURF_P2));
   3103		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3104				PIPE_CONFIG(ADDR_SURF_P2) |
   3105				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3106				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3107		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3108				 PIPE_CONFIG(ADDR_SURF_P2) |
   3109				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3110				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3111		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3112				 PIPE_CONFIG(ADDR_SURF_P2) |
   3113				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3114				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   3115		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3116				 PIPE_CONFIG(ADDR_SURF_P2) |
   3117				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3118				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3119		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3120				 PIPE_CONFIG(ADDR_SURF_P2) |
   3121				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3122				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3123		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
   3124				 PIPE_CONFIG(ADDR_SURF_P2) |
   3125				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3126				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3127		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3128				 PIPE_CONFIG(ADDR_SURF_P2) |
   3129				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3130				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   3131		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   3132				 PIPE_CONFIG(ADDR_SURF_P2) |
   3133				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3134				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3135		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   3136				 PIPE_CONFIG(ADDR_SURF_P2) |
   3137				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   3138				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3139		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   3140				 PIPE_CONFIG(ADDR_SURF_P2) |
   3141				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   3142				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3143		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
   3144				 PIPE_CONFIG(ADDR_SURF_P2) |
   3145				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   3146				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3147		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   3148				 PIPE_CONFIG(ADDR_SURF_P2) |
   3149				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   3150				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3151		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   3152				 PIPE_CONFIG(ADDR_SURF_P2) |
   3153				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3154				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3155		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
   3156				 PIPE_CONFIG(ADDR_SURF_P2) |
   3157				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   3158				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3159		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
   3160				 PIPE_CONFIG(ADDR_SURF_P2) |
   3161				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   3162				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3163		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3164				 PIPE_CONFIG(ADDR_SURF_P2) |
   3165				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3166				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3167		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3168				 PIPE_CONFIG(ADDR_SURF_P2) |
   3169				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3170				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3171		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3172				 PIPE_CONFIG(ADDR_SURF_P2) |
   3173				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3174				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   3175
   3176		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3177				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3178				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3179				NUM_BANKS(ADDR_SURF_8_BANK));
   3180		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3181				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3182				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3183				NUM_BANKS(ADDR_SURF_8_BANK));
   3184		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3185				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3186				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3187				NUM_BANKS(ADDR_SURF_8_BANK));
   3188		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3189				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3190				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3191				NUM_BANKS(ADDR_SURF_8_BANK));
   3192		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3193				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3194				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3195				NUM_BANKS(ADDR_SURF_8_BANK));
   3196		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3197				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3198				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3199				NUM_BANKS(ADDR_SURF_8_BANK));
   3200		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3201				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3202				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3203				NUM_BANKS(ADDR_SURF_8_BANK));
   3204		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   3205				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   3206				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3207				NUM_BANKS(ADDR_SURF_16_BANK));
   3208		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   3209				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3210				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3211				NUM_BANKS(ADDR_SURF_16_BANK));
   3212		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   3213				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3214				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3215				 NUM_BANKS(ADDR_SURF_16_BANK));
   3216		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   3217				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3218				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3219				 NUM_BANKS(ADDR_SURF_16_BANK));
   3220		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3221				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3222				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3223				 NUM_BANKS(ADDR_SURF_16_BANK));
   3224		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3225				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3226				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3227				 NUM_BANKS(ADDR_SURF_16_BANK));
   3228		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3229				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3230				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3231				 NUM_BANKS(ADDR_SURF_8_BANK));
   3232
   3233		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   3234			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
   3235			    reg_offset != 23)
   3236				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
   3237
   3238		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   3239			if (reg_offset != 7)
   3240				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
   3241
   3242		break;
   3243	default:
   3244		dev_warn(adev->dev,
   3245			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
   3246			 adev->asic_type);
   3247		fallthrough;
   3248
   3249	case CHIP_CARRIZO:
   3250		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3251				PIPE_CONFIG(ADDR_SURF_P2) |
   3252				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
   3253				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3254		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3255				PIPE_CONFIG(ADDR_SURF_P2) |
   3256				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
   3257				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3258		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3259				PIPE_CONFIG(ADDR_SURF_P2) |
   3260				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
   3261				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3262		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3263				PIPE_CONFIG(ADDR_SURF_P2) |
   3264				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
   3265				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3266		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3267				PIPE_CONFIG(ADDR_SURF_P2) |
   3268				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   3269				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3270		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3271				PIPE_CONFIG(ADDR_SURF_P2) |
   3272				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   3273				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3274		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3275				PIPE_CONFIG(ADDR_SURF_P2) |
   3276				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
   3277				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   3278		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   3279				PIPE_CONFIG(ADDR_SURF_P2));
   3280		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3281				PIPE_CONFIG(ADDR_SURF_P2) |
   3282				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3283				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3284		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3285				 PIPE_CONFIG(ADDR_SURF_P2) |
   3286				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3287				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3288		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3289				 PIPE_CONFIG(ADDR_SURF_P2) |
   3290				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   3291				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   3292		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3293				 PIPE_CONFIG(ADDR_SURF_P2) |
   3294				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3295				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3296		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3297				 PIPE_CONFIG(ADDR_SURF_P2) |
   3298				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3299				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3300		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
   3301				 PIPE_CONFIG(ADDR_SURF_P2) |
   3302				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3303				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3304		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3305				 PIPE_CONFIG(ADDR_SURF_P2) |
   3306				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3307				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   3308		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   3309				 PIPE_CONFIG(ADDR_SURF_P2) |
   3310				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3311				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3312		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   3313				 PIPE_CONFIG(ADDR_SURF_P2) |
   3314				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   3315				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3316		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   3317				 PIPE_CONFIG(ADDR_SURF_P2) |
   3318				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   3319				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3320		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
   3321				 PIPE_CONFIG(ADDR_SURF_P2) |
   3322				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   3323				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3324		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   3325				 PIPE_CONFIG(ADDR_SURF_P2) |
   3326				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   3327				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3328		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   3329				 PIPE_CONFIG(ADDR_SURF_P2) |
   3330				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   3331				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3332		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
   3333				 PIPE_CONFIG(ADDR_SURF_P2) |
   3334				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   3335				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3336		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
   3337				 PIPE_CONFIG(ADDR_SURF_P2) |
   3338				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   3339				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   3340		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   3341				 PIPE_CONFIG(ADDR_SURF_P2) |
   3342				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3343				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3344		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   3345				 PIPE_CONFIG(ADDR_SURF_P2) |
   3346				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3347				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   3348		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   3349				 PIPE_CONFIG(ADDR_SURF_P2) |
   3350				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   3351				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   3352
   3353		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3354				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3355				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3356				NUM_BANKS(ADDR_SURF_8_BANK));
   3357		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3358				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3359				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3360				NUM_BANKS(ADDR_SURF_8_BANK));
   3361		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3362				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3363				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3364				NUM_BANKS(ADDR_SURF_8_BANK));
   3365		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3366				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3367				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3368				NUM_BANKS(ADDR_SURF_8_BANK));
   3369		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3370				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3371				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3372				NUM_BANKS(ADDR_SURF_8_BANK));
   3373		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3374				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3375				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3376				NUM_BANKS(ADDR_SURF_8_BANK));
   3377		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3378				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3379				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3380				NUM_BANKS(ADDR_SURF_8_BANK));
   3381		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   3382				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   3383				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3384				NUM_BANKS(ADDR_SURF_16_BANK));
   3385		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   3386				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3387				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3388				NUM_BANKS(ADDR_SURF_16_BANK));
   3389		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   3390				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   3391				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3392				 NUM_BANKS(ADDR_SURF_16_BANK));
   3393		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   3394				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3395				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3396				 NUM_BANKS(ADDR_SURF_16_BANK));
   3397		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3398				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   3399				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3400				 NUM_BANKS(ADDR_SURF_16_BANK));
   3401		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3402				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3403				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   3404				 NUM_BANKS(ADDR_SURF_16_BANK));
   3405		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3406				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3407				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3408				 NUM_BANKS(ADDR_SURF_8_BANK));
   3409
   3410		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   3411			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
   3412			    reg_offset != 23)
   3413				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
   3414
   3415		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   3416			if (reg_offset != 7)
   3417				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
   3418
   3419		break;
   3420	}
   3421}
   3422
   3423static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
   3424				  u32 se_num, u32 sh_num, u32 instance)
   3425{
   3426	u32 data;
   3427
   3428	if (instance == 0xffffffff)
   3429		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
   3430	else
   3431		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
   3432
   3433	if (se_num == 0xffffffff)
   3434		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
   3435	else
   3436		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
   3437
   3438	if (sh_num == 0xffffffff)
   3439		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
   3440	else
   3441		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
   3442
   3443	WREG32(mmGRBM_GFX_INDEX, data);
   3444}
   3445
   3446static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
   3447				  u32 me, u32 pipe, u32 q, u32 vm)
   3448{
   3449	vi_srbm_select(adev, me, pipe, q, vm);
   3450}
   3451
   3452static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
   3453{
   3454	u32 data, mask;
   3455
   3456	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
   3457		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
   3458
   3459	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
   3460
   3461	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
   3462					 adev->gfx.config.max_sh_per_se);
   3463
   3464	return (~data) & mask;
   3465}
   3466
   3467static void
   3468gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
   3469{
   3470	switch (adev->asic_type) {
   3471	case CHIP_FIJI:
   3472	case CHIP_VEGAM:
   3473		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
   3474			  RB_XSEL2(1) | PKR_MAP(2) |
   3475			  PKR_XSEL(1) | PKR_YSEL(1) |
   3476			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
   3477		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
   3478			   SE_PAIR_YSEL(2);
   3479		break;
   3480	case CHIP_TONGA:
   3481	case CHIP_POLARIS10:
   3482		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
   3483			  SE_XSEL(1) | SE_YSEL(1);
   3484		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
   3485			   SE_PAIR_YSEL(2);
   3486		break;
   3487	case CHIP_TOPAZ:
   3488	case CHIP_CARRIZO:
   3489		*rconf |= RB_MAP_PKR0(2);
   3490		*rconf1 |= 0x0;
   3491		break;
   3492	case CHIP_POLARIS11:
   3493	case CHIP_POLARIS12:
   3494		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
   3495			  SE_XSEL(1) | SE_YSEL(1);
   3496		*rconf1 |= 0x0;
   3497		break;
   3498	case CHIP_STONEY:
   3499		*rconf |= 0x0;
   3500		*rconf1 |= 0x0;
   3501		break;
   3502	default:
   3503		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
   3504		break;
   3505	}
   3506}
   3507
   3508static void
   3509gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
   3510					u32 raster_config, u32 raster_config_1,
   3511					unsigned rb_mask, unsigned num_rb)
   3512{
   3513	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
   3514	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
   3515	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
   3516	unsigned rb_per_se = num_rb / num_se;
   3517	unsigned se_mask[4];
   3518	unsigned se;
   3519
   3520	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
   3521	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
   3522	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
   3523	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
   3524
   3525	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
   3526	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
   3527	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
   3528
   3529	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
   3530			     (!se_mask[2] && !se_mask[3]))) {
   3531		raster_config_1 &= ~SE_PAIR_MAP_MASK;
   3532
   3533		if (!se_mask[0] && !se_mask[1]) {
   3534			raster_config_1 |=
   3535				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
   3536		} else {
   3537			raster_config_1 |=
   3538				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
   3539		}
   3540	}
   3541
   3542	for (se = 0; se < num_se; se++) {
   3543		unsigned raster_config_se = raster_config;
   3544		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
   3545		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
   3546		int idx = (se / 2) * 2;
   3547
   3548		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
   3549			raster_config_se &= ~SE_MAP_MASK;
   3550
   3551			if (!se_mask[idx]) {
   3552				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
   3553			} else {
   3554				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
   3555			}
   3556		}
   3557
   3558		pkr0_mask &= rb_mask;
   3559		pkr1_mask &= rb_mask;
   3560		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
   3561			raster_config_se &= ~PKR_MAP_MASK;
   3562
   3563			if (!pkr0_mask) {
   3564				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
   3565			} else {
   3566				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
   3567			}
   3568		}
   3569
   3570		if (rb_per_se >= 2) {
   3571			unsigned rb0_mask = 1 << (se * rb_per_se);
   3572			unsigned rb1_mask = rb0_mask << 1;
   3573
   3574			rb0_mask &= rb_mask;
   3575			rb1_mask &= rb_mask;
   3576			if (!rb0_mask || !rb1_mask) {
   3577				raster_config_se &= ~RB_MAP_PKR0_MASK;
   3578
   3579				if (!rb0_mask) {
   3580					raster_config_se |=
   3581						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
   3582				} else {
   3583					raster_config_se |=
   3584						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
   3585				}
   3586			}
   3587
   3588			if (rb_per_se > 2) {
   3589				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
   3590				rb1_mask = rb0_mask << 1;
   3591				rb0_mask &= rb_mask;
   3592				rb1_mask &= rb_mask;
   3593				if (!rb0_mask || !rb1_mask) {
   3594					raster_config_se &= ~RB_MAP_PKR1_MASK;
   3595
   3596					if (!rb0_mask) {
   3597						raster_config_se |=
   3598							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
   3599					} else {
   3600						raster_config_se |=
   3601							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
   3602					}
   3603				}
   3604			}
   3605		}
   3606
   3607		/* GRBM_GFX_INDEX has a different offset on VI */
   3608		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
   3609		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
   3610		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
   3611	}
   3612
   3613	/* GRBM_GFX_INDEX has a different offset on VI */
   3614	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   3615}
   3616
   3617static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
   3618{
   3619	int i, j;
   3620	u32 data;
   3621	u32 raster_config = 0, raster_config_1 = 0;
   3622	u32 active_rbs = 0;
   3623	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
   3624					adev->gfx.config.max_sh_per_se;
   3625	unsigned num_rb_pipes;
   3626
   3627	mutex_lock(&adev->grbm_idx_mutex);
   3628	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   3629		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   3630			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
   3631			data = gfx_v8_0_get_rb_active_bitmap(adev);
   3632			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
   3633					       rb_bitmap_width_per_sh);
   3634		}
   3635	}
   3636	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   3637
   3638	adev->gfx.config.backend_enable_mask = active_rbs;
   3639	adev->gfx.config.num_rbs = hweight32(active_rbs);
   3640
   3641	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
   3642			     adev->gfx.config.max_shader_engines, 16);
   3643
   3644	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
   3645
   3646	if (!adev->gfx.config.backend_enable_mask ||
   3647			adev->gfx.config.num_rbs >= num_rb_pipes) {
   3648		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
   3649		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
   3650	} else {
   3651		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
   3652							adev->gfx.config.backend_enable_mask,
   3653							num_rb_pipes);
   3654	}
   3655
   3656	/* cache the values for userspace */
   3657	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   3658		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   3659			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
   3660			adev->gfx.config.rb_config[i][j].rb_backend_disable =
   3661				RREG32(mmCC_RB_BACKEND_DISABLE);
   3662			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
   3663				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
   3664			adev->gfx.config.rb_config[i][j].raster_config =
   3665				RREG32(mmPA_SC_RASTER_CONFIG);
   3666			adev->gfx.config.rb_config[i][j].raster_config_1 =
   3667				RREG32(mmPA_SC_RASTER_CONFIG_1);
   3668		}
   3669	}
   3670	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   3671	mutex_unlock(&adev->grbm_idx_mutex);
   3672}
   3673
   3674#define DEFAULT_SH_MEM_BASES	(0x6000)
   3675/**
   3676 * gfx_v8_0_init_compute_vmid - gart enable
   3677 *
   3678 * @adev: amdgpu_device pointer
   3679 *
   3680 * Initialize compute vmid sh_mem registers
   3681 *
   3682 */
   3683static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
   3684{
   3685	int i;
   3686	uint32_t sh_mem_config;
   3687	uint32_t sh_mem_bases;
   3688
   3689	/*
   3690	 * Configure apertures:
   3691	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
   3692	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
   3693	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
   3694	 */
   3695	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
   3696
   3697	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
   3698			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
   3699			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
   3700			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
   3701			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
   3702			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
   3703
   3704	mutex_lock(&adev->srbm_mutex);
   3705	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
   3706		vi_srbm_select(adev, 0, 0, 0, i);
   3707		/* CP and shaders */
   3708		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
   3709		WREG32(mmSH_MEM_APE1_BASE, 1);
   3710		WREG32(mmSH_MEM_APE1_LIMIT, 0);
   3711		WREG32(mmSH_MEM_BASES, sh_mem_bases);
   3712	}
   3713	vi_srbm_select(adev, 0, 0, 0, 0);
   3714	mutex_unlock(&adev->srbm_mutex);
   3715
   3716	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
   3717	   access. These should be enabled by FW for target VMIDs. */
   3718	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
   3719		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
   3720		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
   3721		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
   3722		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
   3723	}
   3724}
   3725
   3726static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
   3727{
   3728	int vmid;
   3729
   3730	/*
   3731	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
   3732	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
   3733	 * the driver can enable them for graphics. VMID0 should maintain
   3734	 * access so that HWS firmware can save/restore entries.
   3735	 */
   3736	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
   3737		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
   3738		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
   3739		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
   3740		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
   3741	}
   3742}
   3743
   3744static void gfx_v8_0_config_init(struct amdgpu_device *adev)
   3745{
   3746	switch (adev->asic_type) {
   3747	default:
   3748		adev->gfx.config.double_offchip_lds_buf = 1;
   3749		break;
   3750	case CHIP_CARRIZO:
   3751	case CHIP_STONEY:
   3752		adev->gfx.config.double_offchip_lds_buf = 0;
   3753		break;
   3754	}
   3755}
   3756
   3757static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
   3758{
   3759	u32 tmp, sh_static_mem_cfg;
   3760	int i;
   3761
   3762	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
   3763	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
   3764	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
   3765	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
   3766
   3767	gfx_v8_0_tiling_mode_table_init(adev);
   3768	gfx_v8_0_setup_rb(adev);
   3769	gfx_v8_0_get_cu_info(adev);
   3770	gfx_v8_0_config_init(adev);
   3771
   3772	/* XXX SH_MEM regs */
   3773	/* where to put LDS, scratch, GPUVM in FSA64 space */
   3774	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
   3775				   SWIZZLE_ENABLE, 1);
   3776	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
   3777				   ELEMENT_SIZE, 1);
   3778	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
   3779				   INDEX_STRIDE, 3);
   3780	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
   3781
   3782	mutex_lock(&adev->srbm_mutex);
   3783	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
   3784		vi_srbm_select(adev, 0, 0, 0, i);
   3785		/* CP and shaders */
   3786		if (i == 0) {
   3787			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
   3788			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
   3789			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
   3790					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
   3791			WREG32(mmSH_MEM_CONFIG, tmp);
   3792			WREG32(mmSH_MEM_BASES, 0);
   3793		} else {
   3794			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
   3795			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
   3796			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
   3797					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
   3798			WREG32(mmSH_MEM_CONFIG, tmp);
   3799			tmp = adev->gmc.shared_aperture_start >> 48;
   3800			WREG32(mmSH_MEM_BASES, tmp);
   3801		}
   3802
   3803		WREG32(mmSH_MEM_APE1_BASE, 1);
   3804		WREG32(mmSH_MEM_APE1_LIMIT, 0);
   3805	}
   3806	vi_srbm_select(adev, 0, 0, 0, 0);
   3807	mutex_unlock(&adev->srbm_mutex);
   3808
   3809	gfx_v8_0_init_compute_vmid(adev);
   3810	gfx_v8_0_init_gds_vmid(adev);
   3811
   3812	mutex_lock(&adev->grbm_idx_mutex);
   3813	/*
   3814	 * making sure that the following register writes will be broadcasted
   3815	 * to all the shaders
   3816	 */
   3817	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   3818
   3819	WREG32(mmPA_SC_FIFO_SIZE,
   3820		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
   3821			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
   3822		   (adev->gfx.config.sc_prim_fifo_size_backend <<
   3823			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
   3824		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
   3825			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
   3826		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
   3827			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
   3828
   3829	tmp = RREG32(mmSPI_ARB_PRIORITY);
   3830	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
   3831	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
   3832	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
   3833	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
   3834	WREG32(mmSPI_ARB_PRIORITY, tmp);
   3835
   3836	mutex_unlock(&adev->grbm_idx_mutex);
   3837
   3838}
   3839
   3840static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
   3841{
   3842	u32 i, j, k;
   3843	u32 mask;
   3844
   3845	mutex_lock(&adev->grbm_idx_mutex);
   3846	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   3847		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   3848			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
   3849			for (k = 0; k < adev->usec_timeout; k++) {
   3850				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
   3851					break;
   3852				udelay(1);
   3853			}
   3854			if (k == adev->usec_timeout) {
   3855				gfx_v8_0_select_se_sh(adev, 0xffffffff,
   3856						      0xffffffff, 0xffffffff);
   3857				mutex_unlock(&adev->grbm_idx_mutex);
   3858				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
   3859					 i, j);
   3860				return;
   3861			}
   3862		}
   3863	}
   3864	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   3865	mutex_unlock(&adev->grbm_idx_mutex);
   3866
   3867	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
   3868		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
   3869		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
   3870		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
   3871	for (k = 0; k < adev->usec_timeout; k++) {
   3872		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
   3873			break;
   3874		udelay(1);
   3875	}
   3876}
   3877
   3878static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
   3879					       bool enable)
   3880{
   3881	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
   3882
   3883	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
   3884	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
   3885	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
   3886	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
   3887
   3888	WREG32(mmCP_INT_CNTL_RING0, tmp);
   3889}
   3890
   3891static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
   3892{
   3893	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
   3894	/* csib */
   3895	WREG32(mmRLC_CSIB_ADDR_HI,
   3896			adev->gfx.rlc.clear_state_gpu_addr >> 32);
   3897	WREG32(mmRLC_CSIB_ADDR_LO,
   3898			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
   3899	WREG32(mmRLC_CSIB_LENGTH,
   3900			adev->gfx.rlc.clear_state_size);
   3901}
   3902
   3903static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
   3904				int ind_offset,
   3905				int list_size,
   3906				int *unique_indices,
   3907				int *indices_count,
   3908				int max_indices,
   3909				int *ind_start_offsets,
   3910				int *offset_count,
   3911				int max_offset)
   3912{
   3913	int indices;
   3914	bool new_entry = true;
   3915
   3916	for (; ind_offset < list_size; ind_offset++) {
   3917
   3918		if (new_entry) {
   3919			new_entry = false;
   3920			ind_start_offsets[*offset_count] = ind_offset;
   3921			*offset_count = *offset_count + 1;
   3922			BUG_ON(*offset_count >= max_offset);
   3923		}
   3924
   3925		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
   3926			new_entry = true;
   3927			continue;
   3928		}
   3929
   3930		ind_offset += 2;
   3931
   3932		/* look for the matching indice */
   3933		for (indices = 0;
   3934			indices < *indices_count;
   3935			indices++) {
   3936			if (unique_indices[indices] ==
   3937				register_list_format[ind_offset])
   3938				break;
   3939		}
   3940
   3941		if (indices >= *indices_count) {
   3942			unique_indices[*indices_count] =
   3943				register_list_format[ind_offset];
   3944			indices = *indices_count;
   3945			*indices_count = *indices_count + 1;
   3946			BUG_ON(*indices_count >= max_indices);
   3947		}
   3948
   3949		register_list_format[ind_offset] = indices;
   3950	}
   3951}
   3952
   3953static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
   3954{
   3955	int i, temp, data;
   3956	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
   3957	int indices_count = 0;
   3958	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   3959	int offset_count = 0;
   3960
   3961	int list_size;
   3962	unsigned int *register_list_format =
   3963		kmemdup(adev->gfx.rlc.register_list_format,
   3964			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
   3965	if (!register_list_format)
   3966		return -ENOMEM;
   3967
   3968	gfx_v8_0_parse_ind_reg_list(register_list_format,
   3969				RLC_FormatDirectRegListLength,
   3970				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
   3971				unique_indices,
   3972				&indices_count,
   3973				ARRAY_SIZE(unique_indices),
   3974				indirect_start_offsets,
   3975				&offset_count,
   3976				ARRAY_SIZE(indirect_start_offsets));
   3977
   3978	/* save and restore list */
   3979	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
   3980
   3981	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
   3982	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
   3983		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
   3984
   3985	/* indirect list */
   3986	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
   3987	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
   3988		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
   3989
   3990	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
   3991	list_size = list_size >> 1;
   3992	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
   3993	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
   3994
   3995	/* starting offsets starts */
   3996	WREG32(mmRLC_GPM_SCRATCH_ADDR,
   3997		adev->gfx.rlc.starting_offsets_start);
   3998	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
   3999		WREG32(mmRLC_GPM_SCRATCH_DATA,
   4000				indirect_start_offsets[i]);
   4001
   4002	/* unique indices */
   4003	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
   4004	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
   4005	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
   4006		if (unique_indices[i] != 0) {
   4007			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
   4008			WREG32(data + i, unique_indices[i] >> 20);
   4009		}
   4010	}
   4011	kfree(register_list_format);
   4012
   4013	return 0;
   4014}
   4015
   4016static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
   4017{
   4018	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
   4019}
   4020
   4021static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
   4022{
   4023	uint32_t data;
   4024
   4025	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
   4026
   4027	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
   4028	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
   4029	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
   4030	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
   4031	WREG32(mmRLC_PG_DELAY, data);
   4032
   4033	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
   4034	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
   4035
   4036}
   4037
   4038static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
   4039						bool enable)
   4040{
   4041	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
   4042}
   4043
   4044static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
   4045						  bool enable)
   4046{
   4047	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
   4048}
   4049
   4050static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
   4051{
   4052	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
   4053}
   4054
   4055static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
   4056{
   4057	if ((adev->asic_type == CHIP_CARRIZO) ||
   4058	    (adev->asic_type == CHIP_STONEY)) {
   4059		gfx_v8_0_init_csb(adev);
   4060		gfx_v8_0_init_save_restore_list(adev);
   4061		gfx_v8_0_enable_save_restore_machine(adev);
   4062		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
   4063		gfx_v8_0_init_power_gating(adev);
   4064		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
   4065	} else if ((adev->asic_type == CHIP_POLARIS11) ||
   4066		   (adev->asic_type == CHIP_POLARIS12) ||
   4067		   (adev->asic_type == CHIP_VEGAM)) {
   4068		gfx_v8_0_init_csb(adev);
   4069		gfx_v8_0_init_save_restore_list(adev);
   4070		gfx_v8_0_enable_save_restore_machine(adev);
   4071		gfx_v8_0_init_power_gating(adev);
   4072	}
   4073
   4074}
   4075
   4076static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
   4077{
   4078	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
   4079
   4080	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
   4081	gfx_v8_0_wait_for_rlc_serdes(adev);
   4082}
   4083
   4084static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
   4085{
   4086	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
   4087	udelay(50);
   4088
   4089	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
   4090	udelay(50);
   4091}
   4092
   4093static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
   4094{
   4095	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
   4096
   4097	/* carrizo do enable cp interrupt after cp inited */
   4098	if (!(adev->flags & AMD_IS_APU))
   4099		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
   4100
   4101	udelay(50);
   4102}
   4103
   4104static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
   4105{
   4106	if (amdgpu_sriov_vf(adev)) {
   4107		gfx_v8_0_init_csb(adev);
   4108		return 0;
   4109	}
   4110
   4111	adev->gfx.rlc.funcs->stop(adev);
   4112	adev->gfx.rlc.funcs->reset(adev);
   4113	gfx_v8_0_init_pg(adev);
   4114	adev->gfx.rlc.funcs->start(adev);
   4115
   4116	return 0;
   4117}
   4118
   4119static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
   4120{
   4121	u32 tmp = RREG32(mmCP_ME_CNTL);
   4122
   4123	if (enable) {
   4124		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
   4125		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
   4126		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
   4127	} else {
   4128		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
   4129		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
   4130		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
   4131	}
   4132	WREG32(mmCP_ME_CNTL, tmp);
   4133	udelay(50);
   4134}
   4135
   4136static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
   4137{
   4138	u32 count = 0;
   4139	const struct cs_section_def *sect = NULL;
   4140	const struct cs_extent_def *ext = NULL;
   4141
   4142	/* begin clear state */
   4143	count += 2;
   4144	/* context control state */
   4145	count += 3;
   4146
   4147	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
   4148		for (ext = sect->section; ext->extent != NULL; ++ext) {
   4149			if (sect->id == SECT_CONTEXT)
   4150				count += 2 + ext->reg_count;
   4151			else
   4152				return 0;
   4153		}
   4154	}
   4155	/* pa_sc_raster_config/pa_sc_raster_config1 */
   4156	count += 4;
   4157	/* end clear state */
   4158	count += 2;
   4159	/* clear state */
   4160	count += 2;
   4161
   4162	return count;
   4163}
   4164
   4165static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
   4166{
   4167	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
   4168	const struct cs_section_def *sect = NULL;
   4169	const struct cs_extent_def *ext = NULL;
   4170	int r, i;
   4171
   4172	/* init the CP */
   4173	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
   4174	WREG32(mmCP_ENDIAN_SWAP, 0);
   4175	WREG32(mmCP_DEVICE_ID, 1);
   4176
   4177	gfx_v8_0_cp_gfx_enable(adev, true);
   4178
   4179	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
   4180	if (r) {
   4181		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
   4182		return r;
   4183	}
   4184
   4185	/* clear state buffer */
   4186	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   4187	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
   4188
   4189	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   4190	amdgpu_ring_write(ring, 0x80000000);
   4191	amdgpu_ring_write(ring, 0x80000000);
   4192
   4193	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
   4194		for (ext = sect->section; ext->extent != NULL; ++ext) {
   4195			if (sect->id == SECT_CONTEXT) {
   4196				amdgpu_ring_write(ring,
   4197				       PACKET3(PACKET3_SET_CONTEXT_REG,
   4198					       ext->reg_count));
   4199				amdgpu_ring_write(ring,
   4200				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
   4201				for (i = 0; i < ext->reg_count; i++)
   4202					amdgpu_ring_write(ring, ext->extent[i]);
   4203			}
   4204		}
   4205	}
   4206
   4207	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
   4208	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
   4209	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
   4210	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
   4211
   4212	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   4213	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
   4214
   4215	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
   4216	amdgpu_ring_write(ring, 0);
   4217
   4218	/* init the CE partitions */
   4219	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
   4220	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
   4221	amdgpu_ring_write(ring, 0x8000);
   4222	amdgpu_ring_write(ring, 0x8000);
   4223
   4224	amdgpu_ring_commit(ring);
   4225
   4226	return 0;
   4227}
   4228static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
   4229{
   4230	u32 tmp;
   4231	/* no gfx doorbells on iceland */
   4232	if (adev->asic_type == CHIP_TOPAZ)
   4233		return;
   4234
   4235	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
   4236
   4237	if (ring->use_doorbell) {
   4238		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
   4239				DOORBELL_OFFSET, ring->doorbell_index);
   4240		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
   4241						DOORBELL_HIT, 0);
   4242		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
   4243					    DOORBELL_EN, 1);
   4244	} else {
   4245		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
   4246	}
   4247
   4248	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
   4249
   4250	if (adev->flags & AMD_IS_APU)
   4251		return;
   4252
   4253	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
   4254					DOORBELL_RANGE_LOWER,
   4255					adev->doorbell_index.gfx_ring0);
   4256	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
   4257
   4258	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
   4259		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
   4260}
   4261
   4262static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
   4263{
   4264	struct amdgpu_ring *ring;
   4265	u32 tmp;
   4266	u32 rb_bufsz;
   4267	u64 rb_addr, rptr_addr, wptr_gpu_addr;
   4268
   4269	/* Set the write pointer delay */
   4270	WREG32(mmCP_RB_WPTR_DELAY, 0);
   4271
   4272	/* set the RB to use vmid 0 */
   4273	WREG32(mmCP_RB_VMID, 0);
   4274
   4275	/* Set ring buffer size */
   4276	ring = &adev->gfx.gfx_ring[0];
   4277	rb_bufsz = order_base_2(ring->ring_size / 8);
   4278	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
   4279	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
   4280	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
   4281	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
   4282#ifdef __BIG_ENDIAN
   4283	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
   4284#endif
   4285	WREG32(mmCP_RB0_CNTL, tmp);
   4286
   4287	/* Initialize the ring buffer's read and write pointers */
   4288	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
   4289	ring->wptr = 0;
   4290	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
   4291
   4292	/* set the wb address wether it's enabled or not */
   4293	rptr_addr = ring->rptr_gpu_addr;
   4294	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
   4295	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
   4296
   4297	wptr_gpu_addr = ring->wptr_gpu_addr;
   4298	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
   4299	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
   4300	mdelay(1);
   4301	WREG32(mmCP_RB0_CNTL, tmp);
   4302
   4303	rb_addr = ring->gpu_addr >> 8;
   4304	WREG32(mmCP_RB0_BASE, rb_addr);
   4305	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
   4306
   4307	gfx_v8_0_set_cpg_door_bell(adev, ring);
   4308	/* start the ring */
   4309	amdgpu_ring_clear_ring(ring);
   4310	gfx_v8_0_cp_gfx_start(adev);
   4311	ring->sched.ready = true;
   4312
   4313	return 0;
   4314}
   4315
   4316static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
   4317{
   4318	if (enable) {
   4319		WREG32(mmCP_MEC_CNTL, 0);
   4320	} else {
   4321		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
   4322		adev->gfx.kiq.ring.sched.ready = false;
   4323	}
   4324	udelay(50);
   4325}
   4326
   4327/* KIQ functions */
   4328static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
   4329{
   4330	uint32_t tmp;
   4331	struct amdgpu_device *adev = ring->adev;
   4332
   4333	/* tell RLC which is KIQ queue */
   4334	tmp = RREG32(mmRLC_CP_SCHEDULERS);
   4335	tmp &= 0xffffff00;
   4336	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
   4337	WREG32(mmRLC_CP_SCHEDULERS, tmp);
   4338	tmp |= 0x80;
   4339	WREG32(mmRLC_CP_SCHEDULERS, tmp);
   4340}
   4341
   4342static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
   4343{
   4344	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
   4345	uint64_t queue_mask = 0;
   4346	int r, i;
   4347
   4348	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
   4349		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
   4350			continue;
   4351
   4352		/* This situation may be hit in the future if a new HW
   4353		 * generation exposes more than 64 queues. If so, the
   4354		 * definition of queue_mask needs updating */
   4355		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
   4356			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
   4357			break;
   4358		}
   4359
   4360		queue_mask |= (1ull << i);
   4361	}
   4362
   4363	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
   4364	if (r) {
   4365		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
   4366		return r;
   4367	}
   4368	/* set resources */
   4369	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
   4370	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
   4371	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
   4372	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
   4373	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
   4374	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
   4375	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
   4376	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
   4377	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   4378		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
   4379		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
   4380		uint64_t wptr_addr = ring->wptr_gpu_addr;
   4381
   4382		/* map queues */
   4383		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
   4384		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
   4385		amdgpu_ring_write(kiq_ring,
   4386				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
   4387		amdgpu_ring_write(kiq_ring,
   4388				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
   4389				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
   4390				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
   4391				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
   4392		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
   4393		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
   4394		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
   4395		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
   4396	}
   4397
   4398	amdgpu_ring_commit(kiq_ring);
   4399
   4400	return 0;
   4401}
   4402
   4403static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
   4404{
   4405	int i, r = 0;
   4406
   4407	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
   4408		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
   4409		for (i = 0; i < adev->usec_timeout; i++) {
   4410			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
   4411				break;
   4412			udelay(1);
   4413		}
   4414		if (i == adev->usec_timeout)
   4415			r = -ETIMEDOUT;
   4416	}
   4417	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
   4418	WREG32(mmCP_HQD_PQ_RPTR, 0);
   4419	WREG32(mmCP_HQD_PQ_WPTR, 0);
   4420
   4421	return r;
   4422}
   4423
   4424static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
   4425{
   4426	struct amdgpu_device *adev = ring->adev;
   4427
   4428	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
   4429		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
   4430			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
   4431			mqd->cp_hqd_queue_priority =
   4432				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
   4433		}
   4434	}
   4435}
   4436
   4437static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
   4438{
   4439	struct amdgpu_device *adev = ring->adev;
   4440	struct vi_mqd *mqd = ring->mqd_ptr;
   4441	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
   4442	uint32_t tmp;
   4443
   4444	mqd->header = 0xC0310800;
   4445	mqd->compute_pipelinestat_enable = 0x00000001;
   4446	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
   4447	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
   4448	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
   4449	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
   4450	mqd->compute_misc_reserved = 0x00000003;
   4451	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
   4452						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
   4453	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
   4454						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
   4455	eop_base_addr = ring->eop_gpu_addr >> 8;
   4456	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
   4457	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
   4458
   4459	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
   4460	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
   4461	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
   4462			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
   4463
   4464	mqd->cp_hqd_eop_control = tmp;
   4465
   4466	/* enable doorbell? */
   4467	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
   4468			    CP_HQD_PQ_DOORBELL_CONTROL,
   4469			    DOORBELL_EN,
   4470			    ring->use_doorbell ? 1 : 0);
   4471
   4472	mqd->cp_hqd_pq_doorbell_control = tmp;
   4473
   4474	/* set the pointer to the MQD */
   4475	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
   4476	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
   4477
   4478	/* set MQD vmid to 0 */
   4479	tmp = RREG32(mmCP_MQD_CONTROL);
   4480	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
   4481	mqd->cp_mqd_control = tmp;
   4482
   4483	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
   4484	hqd_gpu_addr = ring->gpu_addr >> 8;
   4485	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
   4486	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
   4487
   4488	/* set up the HQD, this is similar to CP_RB0_CNTL */
   4489	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
   4490	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
   4491			    (order_base_2(ring->ring_size / 4) - 1));
   4492	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
   4493			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
   4494#ifdef __BIG_ENDIAN
   4495	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
   4496#endif
   4497	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
   4498	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
   4499	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
   4500	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
   4501	mqd->cp_hqd_pq_control = tmp;
   4502
   4503	/* set the wb address whether it's enabled or not */
   4504	wb_gpu_addr = ring->rptr_gpu_addr;
   4505	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
   4506	mqd->cp_hqd_pq_rptr_report_addr_hi =
   4507		upper_32_bits(wb_gpu_addr) & 0xffff;
   4508
   4509	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
   4510	wb_gpu_addr = ring->wptr_gpu_addr;
   4511	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
   4512	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
   4513
   4514	tmp = 0;
   4515	/* enable the doorbell if requested */
   4516	if (ring->use_doorbell) {
   4517		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
   4518		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4519				DOORBELL_OFFSET, ring->doorbell_index);
   4520
   4521		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4522					 DOORBELL_EN, 1);
   4523		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4524					 DOORBELL_SOURCE, 0);
   4525		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
   4526					 DOORBELL_HIT, 0);
   4527	}
   4528
   4529	mqd->cp_hqd_pq_doorbell_control = tmp;
   4530
   4531	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
   4532	ring->wptr = 0;
   4533	mqd->cp_hqd_pq_wptr = ring->wptr;
   4534	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
   4535
   4536	/* set the vmid for the queue */
   4537	mqd->cp_hqd_vmid = 0;
   4538
   4539	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
   4540	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
   4541	mqd->cp_hqd_persistent_state = tmp;
   4542
   4543	/* set MTYPE */
   4544	tmp = RREG32(mmCP_HQD_IB_CONTROL);
   4545	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
   4546	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
   4547	mqd->cp_hqd_ib_control = tmp;
   4548
   4549	tmp = RREG32(mmCP_HQD_IQ_TIMER);
   4550	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
   4551	mqd->cp_hqd_iq_timer = tmp;
   4552
   4553	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
   4554	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
   4555	mqd->cp_hqd_ctx_save_control = tmp;
   4556
   4557	/* defaults */
   4558	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
   4559	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
   4560	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
   4561	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
   4562	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
   4563	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
   4564	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
   4565	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
   4566	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
   4567	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
   4568	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
   4569	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
   4570
   4571	/* set static priority for a queue/ring */
   4572	gfx_v8_0_mqd_set_priority(ring, mqd);
   4573	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
   4574
   4575	/* map_queues packet doesn't need activate the queue,
   4576	 * so only kiq need set this field.
   4577	 */
   4578	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
   4579		mqd->cp_hqd_active = 1;
   4580
   4581	return 0;
   4582}
   4583
   4584static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
   4585			struct vi_mqd *mqd)
   4586{
   4587	uint32_t mqd_reg;
   4588	uint32_t *mqd_data;
   4589
   4590	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
   4591	mqd_data = &mqd->cp_mqd_base_addr_lo;
   4592
   4593	/* disable wptr polling */
   4594	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
   4595
   4596	/* program all HQD registers */
   4597	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
   4598		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
   4599
   4600	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
   4601	 * This is safe since EOP RPTR==WPTR for any inactive HQD
   4602	 * on ASICs that do not support context-save.
   4603	 * EOP writes/reads can start anywhere in the ring.
   4604	 */
   4605	if (adev->asic_type != CHIP_TONGA) {
   4606		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
   4607		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
   4608		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
   4609	}
   4610
   4611	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
   4612		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
   4613
   4614	/* activate the HQD */
   4615	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
   4616		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
   4617
   4618	return 0;
   4619}
   4620
   4621static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
   4622{
   4623	struct amdgpu_device *adev = ring->adev;
   4624	struct vi_mqd *mqd = ring->mqd_ptr;
   4625	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
   4626
   4627	gfx_v8_0_kiq_setting(ring);
   4628
   4629	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
   4630		/* reset MQD to a clean status */
   4631		if (adev->gfx.mec.mqd_backup[mqd_idx])
   4632			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
   4633
   4634		/* reset ring buffer */
   4635		ring->wptr = 0;
   4636		amdgpu_ring_clear_ring(ring);
   4637		mutex_lock(&adev->srbm_mutex);
   4638		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   4639		gfx_v8_0_mqd_commit(adev, mqd);
   4640		vi_srbm_select(adev, 0, 0, 0, 0);
   4641		mutex_unlock(&adev->srbm_mutex);
   4642	} else {
   4643		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
   4644		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
   4645		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
   4646		mutex_lock(&adev->srbm_mutex);
   4647		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   4648		gfx_v8_0_mqd_init(ring);
   4649		gfx_v8_0_mqd_commit(adev, mqd);
   4650		vi_srbm_select(adev, 0, 0, 0, 0);
   4651		mutex_unlock(&adev->srbm_mutex);
   4652
   4653		if (adev->gfx.mec.mqd_backup[mqd_idx])
   4654			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
   4655	}
   4656
   4657	return 0;
   4658}
   4659
   4660static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
   4661{
   4662	struct amdgpu_device *adev = ring->adev;
   4663	struct vi_mqd *mqd = ring->mqd_ptr;
   4664	int mqd_idx = ring - &adev->gfx.compute_ring[0];
   4665
   4666	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
   4667		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
   4668		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
   4669		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
   4670		mutex_lock(&adev->srbm_mutex);
   4671		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   4672		gfx_v8_0_mqd_init(ring);
   4673		vi_srbm_select(adev, 0, 0, 0, 0);
   4674		mutex_unlock(&adev->srbm_mutex);
   4675
   4676		if (adev->gfx.mec.mqd_backup[mqd_idx])
   4677			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
   4678	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
   4679		/* reset MQD to a clean status */
   4680		if (adev->gfx.mec.mqd_backup[mqd_idx])
   4681			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
   4682		/* reset ring buffer */
   4683		ring->wptr = 0;
   4684		amdgpu_ring_clear_ring(ring);
   4685	} else {
   4686		amdgpu_ring_clear_ring(ring);
   4687	}
   4688	return 0;
   4689}
   4690
   4691static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
   4692{
   4693	if (adev->asic_type > CHIP_TONGA) {
   4694		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
   4695		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
   4696	}
   4697	/* enable doorbells */
   4698	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
   4699}
   4700
   4701static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
   4702{
   4703	struct amdgpu_ring *ring;
   4704	int r;
   4705
   4706	ring = &adev->gfx.kiq.ring;
   4707
   4708	r = amdgpu_bo_reserve(ring->mqd_obj, false);
   4709	if (unlikely(r != 0))
   4710		return r;
   4711
   4712	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
   4713	if (unlikely(r != 0))
   4714		return r;
   4715
   4716	gfx_v8_0_kiq_init_queue(ring);
   4717	amdgpu_bo_kunmap(ring->mqd_obj);
   4718	ring->mqd_ptr = NULL;
   4719	amdgpu_bo_unreserve(ring->mqd_obj);
   4720	ring->sched.ready = true;
   4721	return 0;
   4722}
   4723
   4724static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
   4725{
   4726	struct amdgpu_ring *ring = NULL;
   4727	int r = 0, i;
   4728
   4729	gfx_v8_0_cp_compute_enable(adev, true);
   4730
   4731	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   4732		ring = &adev->gfx.compute_ring[i];
   4733
   4734		r = amdgpu_bo_reserve(ring->mqd_obj, false);
   4735		if (unlikely(r != 0))
   4736			goto done;
   4737		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
   4738		if (!r) {
   4739			r = gfx_v8_0_kcq_init_queue(ring);
   4740			amdgpu_bo_kunmap(ring->mqd_obj);
   4741			ring->mqd_ptr = NULL;
   4742		}
   4743		amdgpu_bo_unreserve(ring->mqd_obj);
   4744		if (r)
   4745			goto done;
   4746	}
   4747
   4748	gfx_v8_0_set_mec_doorbell_range(adev);
   4749
   4750	r = gfx_v8_0_kiq_kcq_enable(adev);
   4751	if (r)
   4752		goto done;
   4753
   4754done:
   4755	return r;
   4756}
   4757
   4758static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
   4759{
   4760	int r, i;
   4761	struct amdgpu_ring *ring;
   4762
   4763	/* collect all the ring_tests here, gfx, kiq, compute */
   4764	ring = &adev->gfx.gfx_ring[0];
   4765	r = amdgpu_ring_test_helper(ring);
   4766	if (r)
   4767		return r;
   4768
   4769	ring = &adev->gfx.kiq.ring;
   4770	r = amdgpu_ring_test_helper(ring);
   4771	if (r)
   4772		return r;
   4773
   4774	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   4775		ring = &adev->gfx.compute_ring[i];
   4776		amdgpu_ring_test_helper(ring);
   4777	}
   4778
   4779	return 0;
   4780}
   4781
   4782static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
   4783{
   4784	int r;
   4785
   4786	if (!(adev->flags & AMD_IS_APU))
   4787		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
   4788
   4789	r = gfx_v8_0_kiq_resume(adev);
   4790	if (r)
   4791		return r;
   4792
   4793	r = gfx_v8_0_cp_gfx_resume(adev);
   4794	if (r)
   4795		return r;
   4796
   4797	r = gfx_v8_0_kcq_resume(adev);
   4798	if (r)
   4799		return r;
   4800
   4801	r = gfx_v8_0_cp_test_all_rings(adev);
   4802	if (r)
   4803		return r;
   4804
   4805	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
   4806
   4807	return 0;
   4808}
   4809
   4810static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
   4811{
   4812	gfx_v8_0_cp_gfx_enable(adev, enable);
   4813	gfx_v8_0_cp_compute_enable(adev, enable);
   4814}
   4815
   4816static int gfx_v8_0_hw_init(void *handle)
   4817{
   4818	int r;
   4819	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4820
   4821	gfx_v8_0_init_golden_registers(adev);
   4822	gfx_v8_0_constants_init(adev);
   4823
   4824	r = adev->gfx.rlc.funcs->resume(adev);
   4825	if (r)
   4826		return r;
   4827
   4828	r = gfx_v8_0_cp_resume(adev);
   4829
   4830	return r;
   4831}
   4832
   4833static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
   4834{
   4835	int r, i;
   4836	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
   4837
   4838	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
   4839	if (r)
   4840		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
   4841
   4842	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   4843		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
   4844
   4845		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
   4846		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
   4847						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
   4848						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
   4849						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
   4850						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
   4851		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
   4852		amdgpu_ring_write(kiq_ring, 0);
   4853		amdgpu_ring_write(kiq_ring, 0);
   4854		amdgpu_ring_write(kiq_ring, 0);
   4855	}
   4856	r = amdgpu_ring_test_helper(kiq_ring);
   4857	if (r)
   4858		DRM_ERROR("KCQ disable failed\n");
   4859
   4860	return r;
   4861}
   4862
   4863static bool gfx_v8_0_is_idle(void *handle)
   4864{
   4865	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4866
   4867	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
   4868		|| RREG32(mmGRBM_STATUS2) != 0x8)
   4869		return false;
   4870	else
   4871		return true;
   4872}
   4873
   4874static bool gfx_v8_0_rlc_is_idle(void *handle)
   4875{
   4876	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4877
   4878	if (RREG32(mmGRBM_STATUS2) != 0x8)
   4879		return false;
   4880	else
   4881		return true;
   4882}
   4883
   4884static int gfx_v8_0_wait_for_rlc_idle(void *handle)
   4885{
   4886	unsigned int i;
   4887	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4888
   4889	for (i = 0; i < adev->usec_timeout; i++) {
   4890		if (gfx_v8_0_rlc_is_idle(handle))
   4891			return 0;
   4892
   4893		udelay(1);
   4894	}
   4895	return -ETIMEDOUT;
   4896}
   4897
   4898static int gfx_v8_0_wait_for_idle(void *handle)
   4899{
   4900	unsigned int i;
   4901	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4902
   4903	for (i = 0; i < adev->usec_timeout; i++) {
   4904		if (gfx_v8_0_is_idle(handle))
   4905			return 0;
   4906
   4907		udelay(1);
   4908	}
   4909	return -ETIMEDOUT;
   4910}
   4911
   4912static int gfx_v8_0_hw_fini(void *handle)
   4913{
   4914	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4915
   4916	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
   4917	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
   4918
   4919	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
   4920
   4921	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
   4922
   4923	/* disable KCQ to avoid CPC touch memory not valid anymore */
   4924	gfx_v8_0_kcq_disable(adev);
   4925
   4926	if (amdgpu_sriov_vf(adev)) {
   4927		pr_debug("For SRIOV client, shouldn't do anything.\n");
   4928		return 0;
   4929	}
   4930	amdgpu_gfx_rlc_enter_safe_mode(adev);
   4931	if (!gfx_v8_0_wait_for_idle(adev))
   4932		gfx_v8_0_cp_enable(adev, false);
   4933	else
   4934		pr_err("cp is busy, skip halt cp\n");
   4935	if (!gfx_v8_0_wait_for_rlc_idle(adev))
   4936		adev->gfx.rlc.funcs->stop(adev);
   4937	else
   4938		pr_err("rlc is busy, skip halt rlc\n");
   4939	amdgpu_gfx_rlc_exit_safe_mode(adev);
   4940
   4941	return 0;
   4942}
   4943
   4944static int gfx_v8_0_suspend(void *handle)
   4945{
   4946	return gfx_v8_0_hw_fini(handle);
   4947}
   4948
   4949static int gfx_v8_0_resume(void *handle)
   4950{
   4951	return gfx_v8_0_hw_init(handle);
   4952}
   4953
   4954static bool gfx_v8_0_check_soft_reset(void *handle)
   4955{
   4956	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4957	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
   4958	u32 tmp;
   4959
   4960	/* GRBM_STATUS */
   4961	tmp = RREG32(mmGRBM_STATUS);
   4962	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
   4963		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
   4964		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
   4965		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
   4966		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
   4967		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
   4968		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
   4969		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
   4970						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
   4971		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
   4972						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
   4973		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
   4974						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
   4975	}
   4976
   4977	/* GRBM_STATUS2 */
   4978	tmp = RREG32(mmGRBM_STATUS2);
   4979	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
   4980		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
   4981						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
   4982
   4983	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
   4984	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
   4985	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
   4986		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
   4987						SOFT_RESET_CPF, 1);
   4988		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
   4989						SOFT_RESET_CPC, 1);
   4990		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
   4991						SOFT_RESET_CPG, 1);
   4992		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
   4993						SOFT_RESET_GRBM, 1);
   4994	}
   4995
   4996	/* SRBM_STATUS */
   4997	tmp = RREG32(mmSRBM_STATUS);
   4998	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
   4999		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
   5000						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
   5001	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
   5002		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
   5003						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
   5004
   5005	if (grbm_soft_reset || srbm_soft_reset) {
   5006		adev->gfx.grbm_soft_reset = grbm_soft_reset;
   5007		adev->gfx.srbm_soft_reset = srbm_soft_reset;
   5008		return true;
   5009	} else {
   5010		adev->gfx.grbm_soft_reset = 0;
   5011		adev->gfx.srbm_soft_reset = 0;
   5012		return false;
   5013	}
   5014}
   5015
   5016static int gfx_v8_0_pre_soft_reset(void *handle)
   5017{
   5018	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5019	u32 grbm_soft_reset = 0;
   5020
   5021	if ((!adev->gfx.grbm_soft_reset) &&
   5022	    (!adev->gfx.srbm_soft_reset))
   5023		return 0;
   5024
   5025	grbm_soft_reset = adev->gfx.grbm_soft_reset;
   5026
   5027	/* stop the rlc */
   5028	adev->gfx.rlc.funcs->stop(adev);
   5029
   5030	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
   5031	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
   5032		/* Disable GFX parsing/prefetching */
   5033		gfx_v8_0_cp_gfx_enable(adev, false);
   5034
   5035	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
   5036	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
   5037	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
   5038	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
   5039		int i;
   5040
   5041		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   5042			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
   5043
   5044			mutex_lock(&adev->srbm_mutex);
   5045			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   5046			gfx_v8_0_deactivate_hqd(adev, 2);
   5047			vi_srbm_select(adev, 0, 0, 0, 0);
   5048			mutex_unlock(&adev->srbm_mutex);
   5049		}
   5050		/* Disable MEC parsing/prefetching */
   5051		gfx_v8_0_cp_compute_enable(adev, false);
   5052	}
   5053
   5054	return 0;
   5055}
   5056
   5057static int gfx_v8_0_soft_reset(void *handle)
   5058{
   5059	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5060	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
   5061	u32 tmp;
   5062
   5063	if ((!adev->gfx.grbm_soft_reset) &&
   5064	    (!adev->gfx.srbm_soft_reset))
   5065		return 0;
   5066
   5067	grbm_soft_reset = adev->gfx.grbm_soft_reset;
   5068	srbm_soft_reset = adev->gfx.srbm_soft_reset;
   5069
   5070	if (grbm_soft_reset || srbm_soft_reset) {
   5071		tmp = RREG32(mmGMCON_DEBUG);
   5072		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
   5073		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
   5074		WREG32(mmGMCON_DEBUG, tmp);
   5075		udelay(50);
   5076	}
   5077
   5078	if (grbm_soft_reset) {
   5079		tmp = RREG32(mmGRBM_SOFT_RESET);
   5080		tmp |= grbm_soft_reset;
   5081		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
   5082		WREG32(mmGRBM_SOFT_RESET, tmp);
   5083		tmp = RREG32(mmGRBM_SOFT_RESET);
   5084
   5085		udelay(50);
   5086
   5087		tmp &= ~grbm_soft_reset;
   5088		WREG32(mmGRBM_SOFT_RESET, tmp);
   5089		tmp = RREG32(mmGRBM_SOFT_RESET);
   5090	}
   5091
   5092	if (srbm_soft_reset) {
   5093		tmp = RREG32(mmSRBM_SOFT_RESET);
   5094		tmp |= srbm_soft_reset;
   5095		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
   5096		WREG32(mmSRBM_SOFT_RESET, tmp);
   5097		tmp = RREG32(mmSRBM_SOFT_RESET);
   5098
   5099		udelay(50);
   5100
   5101		tmp &= ~srbm_soft_reset;
   5102		WREG32(mmSRBM_SOFT_RESET, tmp);
   5103		tmp = RREG32(mmSRBM_SOFT_RESET);
   5104	}
   5105
   5106	if (grbm_soft_reset || srbm_soft_reset) {
   5107		tmp = RREG32(mmGMCON_DEBUG);
   5108		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
   5109		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
   5110		WREG32(mmGMCON_DEBUG, tmp);
   5111	}
   5112
   5113	/* Wait a little for things to settle down */
   5114	udelay(50);
   5115
   5116	return 0;
   5117}
   5118
   5119static int gfx_v8_0_post_soft_reset(void *handle)
   5120{
   5121	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5122	u32 grbm_soft_reset = 0;
   5123
   5124	if ((!adev->gfx.grbm_soft_reset) &&
   5125	    (!adev->gfx.srbm_soft_reset))
   5126		return 0;
   5127
   5128	grbm_soft_reset = adev->gfx.grbm_soft_reset;
   5129
   5130	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
   5131	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
   5132	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
   5133	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
   5134		int i;
   5135
   5136		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   5137			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
   5138
   5139			mutex_lock(&adev->srbm_mutex);
   5140			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   5141			gfx_v8_0_deactivate_hqd(adev, 2);
   5142			vi_srbm_select(adev, 0, 0, 0, 0);
   5143			mutex_unlock(&adev->srbm_mutex);
   5144		}
   5145		gfx_v8_0_kiq_resume(adev);
   5146		gfx_v8_0_kcq_resume(adev);
   5147	}
   5148
   5149	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
   5150	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
   5151		gfx_v8_0_cp_gfx_resume(adev);
   5152
   5153	gfx_v8_0_cp_test_all_rings(adev);
   5154
   5155	adev->gfx.rlc.funcs->start(adev);
   5156
   5157	return 0;
   5158}
   5159
   5160/**
   5161 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
   5162 *
   5163 * @adev: amdgpu_device pointer
   5164 *
   5165 * Fetches a GPU clock counter snapshot.
   5166 * Returns the 64 bit clock counter snapshot.
   5167 */
   5168static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
   5169{
   5170	uint64_t clock;
   5171
   5172	mutex_lock(&adev->gfx.gpu_clock_mutex);
   5173	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
   5174	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
   5175		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
   5176	mutex_unlock(&adev->gfx.gpu_clock_mutex);
   5177	return clock;
   5178}
   5179
   5180static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
   5181					  uint32_t vmid,
   5182					  uint32_t gds_base, uint32_t gds_size,
   5183					  uint32_t gws_base, uint32_t gws_size,
   5184					  uint32_t oa_base, uint32_t oa_size)
   5185{
   5186	/* GDS Base */
   5187	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5188	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   5189				WRITE_DATA_DST_SEL(0)));
   5190	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
   5191	amdgpu_ring_write(ring, 0);
   5192	amdgpu_ring_write(ring, gds_base);
   5193
   5194	/* GDS Size */
   5195	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5196	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   5197				WRITE_DATA_DST_SEL(0)));
   5198	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
   5199	amdgpu_ring_write(ring, 0);
   5200	amdgpu_ring_write(ring, gds_size);
   5201
   5202	/* GWS */
   5203	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5204	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   5205				WRITE_DATA_DST_SEL(0)));
   5206	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
   5207	amdgpu_ring_write(ring, 0);
   5208	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
   5209
   5210	/* OA */
   5211	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5212	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   5213				WRITE_DATA_DST_SEL(0)));
   5214	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
   5215	amdgpu_ring_write(ring, 0);
   5216	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
   5217}
   5218
   5219static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
   5220{
   5221	WREG32(mmSQ_IND_INDEX,
   5222		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
   5223		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
   5224		(address << SQ_IND_INDEX__INDEX__SHIFT) |
   5225		(SQ_IND_INDEX__FORCE_READ_MASK));
   5226	return RREG32(mmSQ_IND_DATA);
   5227}
   5228
   5229static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
   5230			   uint32_t wave, uint32_t thread,
   5231			   uint32_t regno, uint32_t num, uint32_t *out)
   5232{
   5233	WREG32(mmSQ_IND_INDEX,
   5234		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
   5235		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
   5236		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
   5237		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
   5238		(SQ_IND_INDEX__FORCE_READ_MASK) |
   5239		(SQ_IND_INDEX__AUTO_INCR_MASK));
   5240	while (num--)
   5241		*(out++) = RREG32(mmSQ_IND_DATA);
   5242}
   5243
   5244static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
   5245{
   5246	/* type 0 wave data */
   5247	dst[(*no_fields)++] = 0;
   5248	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
   5249	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
   5250	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
   5251	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
   5252	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
   5253	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
   5254	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
   5255	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
   5256	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
   5257	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
   5258	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
   5259	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
   5260	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
   5261	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
   5262	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
   5263	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
   5264	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
   5265	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
   5266	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
   5267}
   5268
   5269static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
   5270				     uint32_t wave, uint32_t start,
   5271				     uint32_t size, uint32_t *dst)
   5272{
   5273	wave_read_regs(
   5274		adev, simd, wave, 0,
   5275		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
   5276}
   5277
   5278
   5279static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
   5280	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
   5281	.select_se_sh = &gfx_v8_0_select_se_sh,
   5282	.read_wave_data = &gfx_v8_0_read_wave_data,
   5283	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
   5284	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
   5285};
   5286
   5287static int gfx_v8_0_early_init(void *handle)
   5288{
   5289	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5290
   5291	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
   5292	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
   5293					  AMDGPU_MAX_COMPUTE_RINGS);
   5294	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
   5295	gfx_v8_0_set_ring_funcs(adev);
   5296	gfx_v8_0_set_irq_funcs(adev);
   5297	gfx_v8_0_set_gds_init(adev);
   5298	gfx_v8_0_set_rlc_funcs(adev);
   5299
   5300	return 0;
   5301}
   5302
   5303static int gfx_v8_0_late_init(void *handle)
   5304{
   5305	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5306	int r;
   5307
   5308	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
   5309	if (r)
   5310		return r;
   5311
   5312	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
   5313	if (r)
   5314		return r;
   5315
   5316	/* requires IBs so do in late init after IB pool is initialized */
   5317	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
   5318	if (r)
   5319		return r;
   5320
   5321	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
   5322	if (r) {
   5323		DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
   5324		return r;
   5325	}
   5326
   5327	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
   5328	if (r) {
   5329		DRM_ERROR(
   5330			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
   5331			r);
   5332		return r;
   5333	}
   5334
   5335	return 0;
   5336}
   5337
   5338static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
   5339						       bool enable)
   5340{
   5341	if ((adev->asic_type == CHIP_POLARIS11) ||
   5342	    (adev->asic_type == CHIP_POLARIS12) ||
   5343	    (adev->asic_type == CHIP_VEGAM))
   5344		/* Send msg to SMU via Powerplay */
   5345		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
   5346
   5347	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
   5348}
   5349
   5350static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
   5351							bool enable)
   5352{
   5353	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
   5354}
   5355
   5356static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
   5357		bool enable)
   5358{
   5359	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
   5360}
   5361
   5362static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
   5363					  bool enable)
   5364{
   5365	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
   5366}
   5367
   5368static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
   5369						bool enable)
   5370{
   5371	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
   5372
   5373	/* Read any GFX register to wake up GFX. */
   5374	if (!enable)
   5375		RREG32(mmDB_RENDER_CONTROL);
   5376}
   5377
   5378static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
   5379					  bool enable)
   5380{
   5381	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
   5382		cz_enable_gfx_cg_power_gating(adev, true);
   5383		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
   5384			cz_enable_gfx_pipeline_power_gating(adev, true);
   5385	} else {
   5386		cz_enable_gfx_cg_power_gating(adev, false);
   5387		cz_enable_gfx_pipeline_power_gating(adev, false);
   5388	}
   5389}
   5390
   5391static int gfx_v8_0_set_powergating_state(void *handle,
   5392					  enum amd_powergating_state state)
   5393{
   5394	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5395	bool enable = (state == AMD_PG_STATE_GATE);
   5396
   5397	if (amdgpu_sriov_vf(adev))
   5398		return 0;
   5399
   5400	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
   5401				AMD_PG_SUPPORT_RLC_SMU_HS |
   5402				AMD_PG_SUPPORT_CP |
   5403				AMD_PG_SUPPORT_GFX_DMG))
   5404		amdgpu_gfx_rlc_enter_safe_mode(adev);
   5405	switch (adev->asic_type) {
   5406	case CHIP_CARRIZO:
   5407	case CHIP_STONEY:
   5408
   5409		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
   5410			cz_enable_sck_slow_down_on_power_up(adev, true);
   5411			cz_enable_sck_slow_down_on_power_down(adev, true);
   5412		} else {
   5413			cz_enable_sck_slow_down_on_power_up(adev, false);
   5414			cz_enable_sck_slow_down_on_power_down(adev, false);
   5415		}
   5416		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
   5417			cz_enable_cp_power_gating(adev, true);
   5418		else
   5419			cz_enable_cp_power_gating(adev, false);
   5420
   5421		cz_update_gfx_cg_power_gating(adev, enable);
   5422
   5423		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
   5424			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
   5425		else
   5426			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
   5427
   5428		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
   5429			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
   5430		else
   5431			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
   5432		break;
   5433	case CHIP_POLARIS11:
   5434	case CHIP_POLARIS12:
   5435	case CHIP_VEGAM:
   5436		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
   5437			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
   5438		else
   5439			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
   5440
   5441		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
   5442			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
   5443		else
   5444			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
   5445
   5446		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
   5447			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
   5448		else
   5449			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
   5450		break;
   5451	default:
   5452		break;
   5453	}
   5454	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
   5455				AMD_PG_SUPPORT_RLC_SMU_HS |
   5456				AMD_PG_SUPPORT_CP |
   5457				AMD_PG_SUPPORT_GFX_DMG))
   5458		amdgpu_gfx_rlc_exit_safe_mode(adev);
   5459	return 0;
   5460}
   5461
   5462static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
   5463{
   5464	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   5465	int data;
   5466
   5467	if (amdgpu_sriov_vf(adev))
   5468		*flags = 0;
   5469
   5470	/* AMD_CG_SUPPORT_GFX_MGCG */
   5471	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
   5472	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
   5473		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
   5474
   5475	/* AMD_CG_SUPPORT_GFX_CGLG */
   5476	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
   5477	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
   5478		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
   5479
   5480	/* AMD_CG_SUPPORT_GFX_CGLS */
   5481	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
   5482		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
   5483
   5484	/* AMD_CG_SUPPORT_GFX_CGTS */
   5485	data = RREG32(mmCGTS_SM_CTRL_REG);
   5486	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
   5487		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
   5488
   5489	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
   5490	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
   5491		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
   5492
   5493	/* AMD_CG_SUPPORT_GFX_RLC_LS */
   5494	data = RREG32(mmRLC_MEM_SLP_CNTL);
   5495	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
   5496		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
   5497
   5498	/* AMD_CG_SUPPORT_GFX_CP_LS */
   5499	data = RREG32(mmCP_MEM_SLP_CNTL);
   5500	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
   5501		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
   5502}
   5503
   5504static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
   5505				     uint32_t reg_addr, uint32_t cmd)
   5506{
   5507	uint32_t data;
   5508
   5509	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   5510
   5511	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
   5512	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
   5513
   5514	data = RREG32(mmRLC_SERDES_WR_CTRL);
   5515	if (adev->asic_type == CHIP_STONEY)
   5516		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
   5517			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
   5518			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
   5519			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
   5520			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
   5521			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
   5522			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
   5523			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
   5524			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
   5525	else
   5526		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
   5527			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
   5528			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
   5529			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
   5530			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
   5531			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
   5532			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
   5533			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
   5534			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
   5535			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
   5536			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
   5537	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
   5538		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
   5539		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
   5540		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
   5541
   5542	WREG32(mmRLC_SERDES_WR_CTRL, data);
   5543}
   5544
   5545#define MSG_ENTER_RLC_SAFE_MODE     1
   5546#define MSG_EXIT_RLC_SAFE_MODE      0
   5547#define RLC_GPR_REG2__REQ_MASK 0x00000001
   5548#define RLC_GPR_REG2__REQ__SHIFT 0
   5549#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
   5550#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
   5551
   5552static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
   5553{
   5554	uint32_t rlc_setting;
   5555
   5556	rlc_setting = RREG32(mmRLC_CNTL);
   5557	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
   5558		return false;
   5559
   5560	return true;
   5561}
   5562
   5563static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
   5564{
   5565	uint32_t data;
   5566	unsigned i;
   5567	data = RREG32(mmRLC_CNTL);
   5568	data |= RLC_SAFE_MODE__CMD_MASK;
   5569	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
   5570	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
   5571	WREG32(mmRLC_SAFE_MODE, data);
   5572
   5573	/* wait for RLC_SAFE_MODE */
   5574	for (i = 0; i < adev->usec_timeout; i++) {
   5575		if ((RREG32(mmRLC_GPM_STAT) &
   5576		     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
   5577		      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
   5578		    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
   5579		     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
   5580			break;
   5581		udelay(1);
   5582	}
   5583	for (i = 0; i < adev->usec_timeout; i++) {
   5584		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
   5585			break;
   5586		udelay(1);
   5587	}
   5588}
   5589
   5590static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
   5591{
   5592	uint32_t data;
   5593	unsigned i;
   5594
   5595	data = RREG32(mmRLC_CNTL);
   5596	data |= RLC_SAFE_MODE__CMD_MASK;
   5597	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
   5598	WREG32(mmRLC_SAFE_MODE, data);
   5599
   5600	for (i = 0; i < adev->usec_timeout; i++) {
   5601		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
   5602			break;
   5603		udelay(1);
   5604	}
   5605}
   5606
   5607static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
   5608{
   5609	u32 data;
   5610
   5611	amdgpu_gfx_off_ctrl(adev, false);
   5612
   5613	if (amdgpu_sriov_is_pp_one_vf(adev))
   5614		data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
   5615	else
   5616		data = RREG32(mmRLC_SPM_VMID);
   5617
   5618	data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
   5619	data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
   5620
   5621	if (amdgpu_sriov_is_pp_one_vf(adev))
   5622		WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
   5623	else
   5624		WREG32(mmRLC_SPM_VMID, data);
   5625
   5626	amdgpu_gfx_off_ctrl(adev, true);
   5627}
   5628
   5629static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
   5630	.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
   5631	.set_safe_mode = gfx_v8_0_set_safe_mode,
   5632	.unset_safe_mode = gfx_v8_0_unset_safe_mode,
   5633	.init = gfx_v8_0_rlc_init,
   5634	.get_csb_size = gfx_v8_0_get_csb_size,
   5635	.get_csb_buffer = gfx_v8_0_get_csb_buffer,
   5636	.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
   5637	.resume = gfx_v8_0_rlc_resume,
   5638	.stop = gfx_v8_0_rlc_stop,
   5639	.reset = gfx_v8_0_rlc_reset,
   5640	.start = gfx_v8_0_rlc_start,
   5641	.update_spm_vmid = gfx_v8_0_update_spm_vmid
   5642};
   5643
   5644static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
   5645						      bool enable)
   5646{
   5647	uint32_t temp, data;
   5648
   5649	amdgpu_gfx_rlc_enter_safe_mode(adev);
   5650
   5651	/* It is disabled by HW by default */
   5652	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
   5653		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
   5654			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
   5655				/* 1 - RLC memory Light sleep */
   5656				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
   5657
   5658			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
   5659				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
   5660		}
   5661
   5662		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
   5663		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
   5664		if (adev->flags & AMD_IS_APU)
   5665			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
   5666				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
   5667				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
   5668		else
   5669			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
   5670				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
   5671				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
   5672				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
   5673
   5674		if (temp != data)
   5675			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
   5676
   5677		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
   5678		gfx_v8_0_wait_for_rlc_serdes(adev);
   5679
   5680		/* 5 - clear mgcg override */
   5681		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
   5682
   5683		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
   5684			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
   5685			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
   5686			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
   5687			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
   5688			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
   5689			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
   5690			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
   5691			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
   5692				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
   5693			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
   5694			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
   5695			if (temp != data)
   5696				WREG32(mmCGTS_SM_CTRL_REG, data);
   5697		}
   5698		udelay(50);
   5699
   5700		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
   5701		gfx_v8_0_wait_for_rlc_serdes(adev);
   5702	} else {
   5703		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
   5704		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
   5705		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
   5706				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
   5707				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
   5708				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
   5709		if (temp != data)
   5710			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
   5711
   5712		/* 2 - disable MGLS in RLC */
   5713		data = RREG32(mmRLC_MEM_SLP_CNTL);
   5714		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
   5715			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
   5716			WREG32(mmRLC_MEM_SLP_CNTL, data);
   5717		}
   5718
   5719		/* 3 - disable MGLS in CP */
   5720		data = RREG32(mmCP_MEM_SLP_CNTL);
   5721		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
   5722			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
   5723			WREG32(mmCP_MEM_SLP_CNTL, data);
   5724		}
   5725
   5726		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
   5727		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
   5728		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
   5729				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
   5730		if (temp != data)
   5731			WREG32(mmCGTS_SM_CTRL_REG, data);
   5732
   5733		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
   5734		gfx_v8_0_wait_for_rlc_serdes(adev);
   5735
   5736		/* 6 - set mgcg override */
   5737		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
   5738
   5739		udelay(50);
   5740
   5741		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
   5742		gfx_v8_0_wait_for_rlc_serdes(adev);
   5743	}
   5744
   5745	amdgpu_gfx_rlc_exit_safe_mode(adev);
   5746}
   5747
   5748static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
   5749						      bool enable)
   5750{
   5751	uint32_t temp, temp1, data, data1;
   5752
   5753	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
   5754
   5755	amdgpu_gfx_rlc_enter_safe_mode(adev);
   5756
   5757	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
   5758		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
   5759		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
   5760		if (temp1 != data1)
   5761			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
   5762
   5763		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
   5764		gfx_v8_0_wait_for_rlc_serdes(adev);
   5765
   5766		/* 2 - clear cgcg override */
   5767		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
   5768
   5769		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
   5770		gfx_v8_0_wait_for_rlc_serdes(adev);
   5771
   5772		/* 3 - write cmd to set CGLS */
   5773		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
   5774
   5775		/* 4 - enable cgcg */
   5776		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
   5777
   5778		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
   5779			/* enable cgls*/
   5780			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
   5781
   5782			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
   5783			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
   5784
   5785			if (temp1 != data1)
   5786				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
   5787		} else {
   5788			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
   5789		}
   5790
   5791		if (temp != data)
   5792			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
   5793
   5794		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
   5795		 * Cmp_busy/GFX_Idle interrupts
   5796		 */
   5797		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
   5798	} else {
   5799		/* disable cntx_empty_int_enable & GFX Idle interrupt */
   5800		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
   5801
   5802		/* TEST CGCG */
   5803		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
   5804		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
   5805				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
   5806		if (temp1 != data1)
   5807			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
   5808
   5809		/* read gfx register to wake up cgcg */
   5810		RREG32(mmCB_CGTT_SCLK_CTRL);
   5811		RREG32(mmCB_CGTT_SCLK_CTRL);
   5812		RREG32(mmCB_CGTT_SCLK_CTRL);
   5813		RREG32(mmCB_CGTT_SCLK_CTRL);
   5814
   5815		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
   5816		gfx_v8_0_wait_for_rlc_serdes(adev);
   5817
   5818		/* write cmd to Set CGCG Override */
   5819		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
   5820
   5821		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
   5822		gfx_v8_0_wait_for_rlc_serdes(adev);
   5823
   5824		/* write cmd to Clear CGLS */
   5825		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
   5826
   5827		/* disable cgcg, cgls should be disabled too. */
   5828		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
   5829			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
   5830		if (temp != data)
   5831			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
   5832		/* enable interrupts again for PG */
   5833		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
   5834	}
   5835
   5836	gfx_v8_0_wait_for_rlc_serdes(adev);
   5837
   5838	amdgpu_gfx_rlc_exit_safe_mode(adev);
   5839}
   5840static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
   5841					    bool enable)
   5842{
   5843	if (enable) {
   5844		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
   5845		 * ===  MGCG + MGLS + TS(CG/LS) ===
   5846		 */
   5847		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
   5848		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
   5849	} else {
   5850		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
   5851		 * ===  CGCG + CGLS ===
   5852		 */
   5853		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
   5854		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
   5855	}
   5856	return 0;
   5857}
   5858
   5859static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
   5860					  enum amd_clockgating_state state)
   5861{
   5862	uint32_t msg_id, pp_state = 0;
   5863	uint32_t pp_support_state = 0;
   5864
   5865	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
   5866		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
   5867			pp_support_state = PP_STATE_SUPPORT_LS;
   5868			pp_state = PP_STATE_LS;
   5869		}
   5870		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
   5871			pp_support_state |= PP_STATE_SUPPORT_CG;
   5872			pp_state |= PP_STATE_CG;
   5873		}
   5874		if (state == AMD_CG_STATE_UNGATE)
   5875			pp_state = 0;
   5876
   5877		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
   5878				PP_BLOCK_GFX_CG,
   5879				pp_support_state,
   5880				pp_state);
   5881		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
   5882	}
   5883
   5884	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
   5885		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
   5886			pp_support_state = PP_STATE_SUPPORT_LS;
   5887			pp_state = PP_STATE_LS;
   5888		}
   5889
   5890		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
   5891			pp_support_state |= PP_STATE_SUPPORT_CG;
   5892			pp_state |= PP_STATE_CG;
   5893		}
   5894
   5895		if (state == AMD_CG_STATE_UNGATE)
   5896			pp_state = 0;
   5897
   5898		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
   5899				PP_BLOCK_GFX_MG,
   5900				pp_support_state,
   5901				pp_state);
   5902		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
   5903	}
   5904
   5905	return 0;
   5906}
   5907
   5908static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
   5909					  enum amd_clockgating_state state)
   5910{
   5911
   5912	uint32_t msg_id, pp_state = 0;
   5913	uint32_t pp_support_state = 0;
   5914
   5915	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
   5916		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
   5917			pp_support_state = PP_STATE_SUPPORT_LS;
   5918			pp_state = PP_STATE_LS;
   5919		}
   5920		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
   5921			pp_support_state |= PP_STATE_SUPPORT_CG;
   5922			pp_state |= PP_STATE_CG;
   5923		}
   5924		if (state == AMD_CG_STATE_UNGATE)
   5925			pp_state = 0;
   5926
   5927		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
   5928				PP_BLOCK_GFX_CG,
   5929				pp_support_state,
   5930				pp_state);
   5931		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
   5932	}
   5933
   5934	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
   5935		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
   5936			pp_support_state = PP_STATE_SUPPORT_LS;
   5937			pp_state = PP_STATE_LS;
   5938		}
   5939		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
   5940			pp_support_state |= PP_STATE_SUPPORT_CG;
   5941			pp_state |= PP_STATE_CG;
   5942		}
   5943		if (state == AMD_CG_STATE_UNGATE)
   5944			pp_state = 0;
   5945
   5946		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
   5947				PP_BLOCK_GFX_3D,
   5948				pp_support_state,
   5949				pp_state);
   5950		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
   5951	}
   5952
   5953	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
   5954		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
   5955			pp_support_state = PP_STATE_SUPPORT_LS;
   5956			pp_state = PP_STATE_LS;
   5957		}
   5958
   5959		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
   5960			pp_support_state |= PP_STATE_SUPPORT_CG;
   5961			pp_state |= PP_STATE_CG;
   5962		}
   5963
   5964		if (state == AMD_CG_STATE_UNGATE)
   5965			pp_state = 0;
   5966
   5967		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
   5968				PP_BLOCK_GFX_MG,
   5969				pp_support_state,
   5970				pp_state);
   5971		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
   5972	}
   5973
   5974	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
   5975		pp_support_state = PP_STATE_SUPPORT_LS;
   5976
   5977		if (state == AMD_CG_STATE_UNGATE)
   5978			pp_state = 0;
   5979		else
   5980			pp_state = PP_STATE_LS;
   5981
   5982		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
   5983				PP_BLOCK_GFX_RLC,
   5984				pp_support_state,
   5985				pp_state);
   5986		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
   5987	}
   5988
   5989	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
   5990		pp_support_state = PP_STATE_SUPPORT_LS;
   5991
   5992		if (state == AMD_CG_STATE_UNGATE)
   5993			pp_state = 0;
   5994		else
   5995			pp_state = PP_STATE_LS;
   5996		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
   5997			PP_BLOCK_GFX_CP,
   5998			pp_support_state,
   5999			pp_state);
   6000		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
   6001	}
   6002
   6003	return 0;
   6004}
   6005
   6006static int gfx_v8_0_set_clockgating_state(void *handle,
   6007					  enum amd_clockgating_state state)
   6008{
   6009	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   6010
   6011	if (amdgpu_sriov_vf(adev))
   6012		return 0;
   6013
   6014	switch (adev->asic_type) {
   6015	case CHIP_FIJI:
   6016	case CHIP_CARRIZO:
   6017	case CHIP_STONEY:
   6018		gfx_v8_0_update_gfx_clock_gating(adev,
   6019						 state == AMD_CG_STATE_GATE);
   6020		break;
   6021	case CHIP_TONGA:
   6022		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
   6023		break;
   6024	case CHIP_POLARIS10:
   6025	case CHIP_POLARIS11:
   6026	case CHIP_POLARIS12:
   6027	case CHIP_VEGAM:
   6028		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
   6029		break;
   6030	default:
   6031		break;
   6032	}
   6033	return 0;
   6034}
   6035
   6036static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
   6037{
   6038	return *ring->rptr_cpu_addr;
   6039}
   6040
   6041static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
   6042{
   6043	struct amdgpu_device *adev = ring->adev;
   6044
   6045	if (ring->use_doorbell)
   6046		/* XXX check if swapping is necessary on BE */
   6047		return *ring->wptr_cpu_addr;
   6048	else
   6049		return RREG32(mmCP_RB0_WPTR);
   6050}
   6051
   6052static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
   6053{
   6054	struct amdgpu_device *adev = ring->adev;
   6055
   6056	if (ring->use_doorbell) {
   6057		/* XXX check if swapping is necessary on BE */
   6058		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
   6059		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
   6060	} else {
   6061		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
   6062		(void)RREG32(mmCP_RB0_WPTR);
   6063	}
   6064}
   6065
   6066static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
   6067{
   6068	u32 ref_and_mask, reg_mem_engine;
   6069
   6070	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
   6071	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
   6072		switch (ring->me) {
   6073		case 1:
   6074			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
   6075			break;
   6076		case 2:
   6077			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
   6078			break;
   6079		default:
   6080			return;
   6081		}
   6082		reg_mem_engine = 0;
   6083	} else {
   6084		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
   6085		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
   6086	}
   6087
   6088	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
   6089	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
   6090				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
   6091				 reg_mem_engine));
   6092	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
   6093	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
   6094	amdgpu_ring_write(ring, ref_and_mask);
   6095	amdgpu_ring_write(ring, ref_and_mask);
   6096	amdgpu_ring_write(ring, 0x20); /* poll interval */
   6097}
   6098
   6099static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
   6100{
   6101	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
   6102	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
   6103		EVENT_INDEX(4));
   6104
   6105	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
   6106	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
   6107		EVENT_INDEX(0));
   6108}
   6109
   6110static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
   6111					struct amdgpu_job *job,
   6112					struct amdgpu_ib *ib,
   6113					uint32_t flags)
   6114{
   6115	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
   6116	u32 header, control = 0;
   6117
   6118	if (ib->flags & AMDGPU_IB_FLAG_CE)
   6119		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
   6120	else
   6121		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
   6122
   6123	control |= ib->length_dw | (vmid << 24);
   6124
   6125	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
   6126		control |= INDIRECT_BUFFER_PRE_ENB(1);
   6127
   6128		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
   6129			gfx_v8_0_ring_emit_de_meta(ring);
   6130	}
   6131
   6132	amdgpu_ring_write(ring, header);
   6133	amdgpu_ring_write(ring,
   6134#ifdef __BIG_ENDIAN
   6135			  (2 << 0) |
   6136#endif
   6137			  (ib->gpu_addr & 0xFFFFFFFC));
   6138	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
   6139	amdgpu_ring_write(ring, control);
   6140}
   6141
   6142static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
   6143					  struct amdgpu_job *job,
   6144					  struct amdgpu_ib *ib,
   6145					  uint32_t flags)
   6146{
   6147	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
   6148	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
   6149
   6150	/* Currently, there is a high possibility to get wave ID mismatch
   6151	 * between ME and GDS, leading to a hw deadlock, because ME generates
   6152	 * different wave IDs than the GDS expects. This situation happens
   6153	 * randomly when at least 5 compute pipes use GDS ordered append.
   6154	 * The wave IDs generated by ME are also wrong after suspend/resume.
   6155	 * Those are probably bugs somewhere else in the kernel driver.
   6156	 *
   6157	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
   6158	 * GDS to 0 for this ring (me/pipe).
   6159	 */
   6160	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
   6161		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
   6162		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
   6163		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
   6164	}
   6165
   6166	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
   6167	amdgpu_ring_write(ring,
   6168#ifdef __BIG_ENDIAN
   6169				(2 << 0) |
   6170#endif
   6171				(ib->gpu_addr & 0xFFFFFFFC));
   6172	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
   6173	amdgpu_ring_write(ring, control);
   6174}
   6175
   6176static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
   6177					 u64 seq, unsigned flags)
   6178{
   6179	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
   6180	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
   6181
   6182	/* Workaround for cache flush problems. First send a dummy EOP
   6183	 * event down the pipe with seq one below.
   6184	 */
   6185	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
   6186	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
   6187				 EOP_TC_ACTION_EN |
   6188				 EOP_TC_WB_ACTION_EN |
   6189				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   6190				 EVENT_INDEX(5)));
   6191	amdgpu_ring_write(ring, addr & 0xfffffffc);
   6192	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
   6193				DATA_SEL(1) | INT_SEL(0));
   6194	amdgpu_ring_write(ring, lower_32_bits(seq - 1));
   6195	amdgpu_ring_write(ring, upper_32_bits(seq - 1));
   6196
   6197	/* Then send the real EOP event down the pipe:
   6198	 * EVENT_WRITE_EOP - flush caches, send int */
   6199	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
   6200	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
   6201				 EOP_TC_ACTION_EN |
   6202				 EOP_TC_WB_ACTION_EN |
   6203				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   6204				 EVENT_INDEX(5)));
   6205	amdgpu_ring_write(ring, addr & 0xfffffffc);
   6206	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
   6207			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
   6208	amdgpu_ring_write(ring, lower_32_bits(seq));
   6209	amdgpu_ring_write(ring, upper_32_bits(seq));
   6210
   6211}
   6212
   6213static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
   6214{
   6215	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
   6216	uint32_t seq = ring->fence_drv.sync_seq;
   6217	uint64_t addr = ring->fence_drv.gpu_addr;
   6218
   6219	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
   6220	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
   6221				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
   6222				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
   6223	amdgpu_ring_write(ring, addr & 0xfffffffc);
   6224	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
   6225	amdgpu_ring_write(ring, seq);
   6226	amdgpu_ring_write(ring, 0xffffffff);
   6227	amdgpu_ring_write(ring, 4); /* poll interval */
   6228}
   6229
   6230static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
   6231					unsigned vmid, uint64_t pd_addr)
   6232{
   6233	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
   6234
   6235	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
   6236
   6237	/* wait for the invalidate to complete */
   6238	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
   6239	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
   6240				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
   6241				 WAIT_REG_MEM_ENGINE(0))); /* me */
   6242	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
   6243	amdgpu_ring_write(ring, 0);
   6244	amdgpu_ring_write(ring, 0); /* ref */
   6245	amdgpu_ring_write(ring, 0); /* mask */
   6246	amdgpu_ring_write(ring, 0x20); /* poll interval */
   6247
   6248	/* compute doesn't have PFP */
   6249	if (usepfp) {
   6250		/* sync PFP to ME, otherwise we might get invalid PFP reads */
   6251		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
   6252		amdgpu_ring_write(ring, 0x0);
   6253	}
   6254}
   6255
   6256static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
   6257{
   6258	return *ring->wptr_cpu_addr;
   6259}
   6260
   6261static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
   6262{
   6263	struct amdgpu_device *adev = ring->adev;
   6264
   6265	/* XXX check if swapping is necessary on BE */
   6266	*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
   6267	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
   6268}
   6269
   6270static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
   6271					     u64 addr, u64 seq,
   6272					     unsigned flags)
   6273{
   6274	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
   6275	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
   6276
   6277	/* RELEASE_MEM - flush caches, send int */
   6278	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
   6279	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
   6280				 EOP_TC_ACTION_EN |
   6281				 EOP_TC_WB_ACTION_EN |
   6282				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   6283				 EVENT_INDEX(5)));
   6284	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
   6285	amdgpu_ring_write(ring, addr & 0xfffffffc);
   6286	amdgpu_ring_write(ring, upper_32_bits(addr));
   6287	amdgpu_ring_write(ring, lower_32_bits(seq));
   6288	amdgpu_ring_write(ring, upper_32_bits(seq));
   6289}
   6290
   6291static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
   6292					 u64 seq, unsigned int flags)
   6293{
   6294	/* we only allocate 32bit for each seq wb address */
   6295	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
   6296
   6297	/* write fence seq to the "addr" */
   6298	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   6299	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   6300				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
   6301	amdgpu_ring_write(ring, lower_32_bits(addr));
   6302	amdgpu_ring_write(ring, upper_32_bits(addr));
   6303	amdgpu_ring_write(ring, lower_32_bits(seq));
   6304
   6305	if (flags & AMDGPU_FENCE_FLAG_INT) {
   6306		/* set register to trigger INT */
   6307		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   6308		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   6309					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
   6310		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
   6311		amdgpu_ring_write(ring, 0);
   6312		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
   6313	}
   6314}
   6315
   6316static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
   6317{
   6318	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
   6319	amdgpu_ring_write(ring, 0);
   6320}
   6321
   6322static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
   6323{
   6324	uint32_t dw2 = 0;
   6325
   6326	if (amdgpu_sriov_vf(ring->adev))
   6327		gfx_v8_0_ring_emit_ce_meta(ring);
   6328
   6329	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
   6330	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
   6331		gfx_v8_0_ring_emit_vgt_flush(ring);
   6332		/* set load_global_config & load_global_uconfig */
   6333		dw2 |= 0x8001;
   6334		/* set load_cs_sh_regs */
   6335		dw2 |= 0x01000000;
   6336		/* set load_per_context_state & load_gfx_sh_regs for GFX */
   6337		dw2 |= 0x10002;
   6338
   6339		/* set load_ce_ram if preamble presented */
   6340		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
   6341			dw2 |= 0x10000000;
   6342	} else {
   6343		/* still load_ce_ram if this is the first time preamble presented
   6344		 * although there is no context switch happens.
   6345		 */
   6346		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
   6347			dw2 |= 0x10000000;
   6348	}
   6349
   6350	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   6351	amdgpu_ring_write(ring, dw2);
   6352	amdgpu_ring_write(ring, 0);
   6353}
   6354
   6355static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
   6356{
   6357	unsigned ret;
   6358
   6359	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
   6360	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
   6361	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
   6362	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
   6363	ret = ring->wptr & ring->buf_mask;
   6364	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
   6365	return ret;
   6366}
   6367
   6368static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
   6369{
   6370	unsigned cur;
   6371
   6372	BUG_ON(offset > ring->buf_mask);
   6373	BUG_ON(ring->ring[offset] != 0x55aa55aa);
   6374
   6375	cur = (ring->wptr & ring->buf_mask) - 1;
   6376	if (likely(cur > offset))
   6377		ring->ring[offset] = cur - offset;
   6378	else
   6379		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
   6380}
   6381
   6382static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
   6383				    uint32_t reg_val_offs)
   6384{
   6385	struct amdgpu_device *adev = ring->adev;
   6386
   6387	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
   6388	amdgpu_ring_write(ring, 0 |	/* src: register*/
   6389				(5 << 8) |	/* dst: memory */
   6390				(1 << 20));	/* write confirm */
   6391	amdgpu_ring_write(ring, reg);
   6392	amdgpu_ring_write(ring, 0);
   6393	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
   6394				reg_val_offs * 4));
   6395	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
   6396				reg_val_offs * 4));
   6397}
   6398
   6399static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
   6400				  uint32_t val)
   6401{
   6402	uint32_t cmd;
   6403
   6404	switch (ring->funcs->type) {
   6405	case AMDGPU_RING_TYPE_GFX:
   6406		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
   6407		break;
   6408	case AMDGPU_RING_TYPE_KIQ:
   6409		cmd = 1 << 16; /* no inc addr */
   6410		break;
   6411	default:
   6412		cmd = WR_CONFIRM;
   6413		break;
   6414	}
   6415
   6416	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   6417	amdgpu_ring_write(ring, cmd);
   6418	amdgpu_ring_write(ring, reg);
   6419	amdgpu_ring_write(ring, 0);
   6420	amdgpu_ring_write(ring, val);
   6421}
   6422
   6423static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
   6424{
   6425	struct amdgpu_device *adev = ring->adev;
   6426	uint32_t value = 0;
   6427
   6428	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
   6429	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
   6430	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
   6431	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
   6432	WREG32(mmSQ_CMD, value);
   6433}
   6434
   6435static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
   6436						 enum amdgpu_interrupt_state state)
   6437{
   6438	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
   6439		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
   6440}
   6441
   6442static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
   6443						     int me, int pipe,
   6444						     enum amdgpu_interrupt_state state)
   6445{
   6446	u32 mec_int_cntl, mec_int_cntl_reg;
   6447
   6448	/*
   6449	 * amdgpu controls only the first MEC. That's why this function only
   6450	 * handles the setting of interrupts for this specific MEC. All other
   6451	 * pipes' interrupts are set by amdkfd.
   6452	 */
   6453
   6454	if (me == 1) {
   6455		switch (pipe) {
   6456		case 0:
   6457			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
   6458			break;
   6459		case 1:
   6460			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
   6461			break;
   6462		case 2:
   6463			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
   6464			break;
   6465		case 3:
   6466			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
   6467			break;
   6468		default:
   6469			DRM_DEBUG("invalid pipe %d\n", pipe);
   6470			return;
   6471		}
   6472	} else {
   6473		DRM_DEBUG("invalid me %d\n", me);
   6474		return;
   6475	}
   6476
   6477	switch (state) {
   6478	case AMDGPU_IRQ_STATE_DISABLE:
   6479		mec_int_cntl = RREG32(mec_int_cntl_reg);
   6480		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
   6481		WREG32(mec_int_cntl_reg, mec_int_cntl);
   6482		break;
   6483	case AMDGPU_IRQ_STATE_ENABLE:
   6484		mec_int_cntl = RREG32(mec_int_cntl_reg);
   6485		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
   6486		WREG32(mec_int_cntl_reg, mec_int_cntl);
   6487		break;
   6488	default:
   6489		break;
   6490	}
   6491}
   6492
   6493static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
   6494					     struct amdgpu_irq_src *source,
   6495					     unsigned type,
   6496					     enum amdgpu_interrupt_state state)
   6497{
   6498	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
   6499		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
   6500
   6501	return 0;
   6502}
   6503
   6504static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
   6505					      struct amdgpu_irq_src *source,
   6506					      unsigned type,
   6507					      enum amdgpu_interrupt_state state)
   6508{
   6509	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
   6510		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
   6511
   6512	return 0;
   6513}
   6514
   6515static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
   6516					    struct amdgpu_irq_src *src,
   6517					    unsigned type,
   6518					    enum amdgpu_interrupt_state state)
   6519{
   6520	switch (type) {
   6521	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
   6522		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
   6523		break;
   6524	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
   6525		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
   6526		break;
   6527	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
   6528		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
   6529		break;
   6530	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
   6531		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
   6532		break;
   6533	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
   6534		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
   6535		break;
   6536	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
   6537		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
   6538		break;
   6539	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
   6540		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
   6541		break;
   6542	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
   6543		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
   6544		break;
   6545	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
   6546		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
   6547		break;
   6548	default:
   6549		break;
   6550	}
   6551	return 0;
   6552}
   6553
   6554static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
   6555					 struct amdgpu_irq_src *source,
   6556					 unsigned int type,
   6557					 enum amdgpu_interrupt_state state)
   6558{
   6559	int enable_flag;
   6560
   6561	switch (state) {
   6562	case AMDGPU_IRQ_STATE_DISABLE:
   6563		enable_flag = 0;
   6564		break;
   6565
   6566	case AMDGPU_IRQ_STATE_ENABLE:
   6567		enable_flag = 1;
   6568		break;
   6569
   6570	default:
   6571		return -EINVAL;
   6572	}
   6573
   6574	WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
   6575	WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
   6576	WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
   6577	WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
   6578	WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
   6579	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
   6580		     enable_flag);
   6581	WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
   6582		     enable_flag);
   6583	WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
   6584		     enable_flag);
   6585	WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
   6586		     enable_flag);
   6587	WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
   6588		     enable_flag);
   6589	WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
   6590		     enable_flag);
   6591	WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
   6592		     enable_flag);
   6593	WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
   6594		     enable_flag);
   6595
   6596	return 0;
   6597}
   6598
   6599static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
   6600				     struct amdgpu_irq_src *source,
   6601				     unsigned int type,
   6602				     enum amdgpu_interrupt_state state)
   6603{
   6604	int enable_flag;
   6605
   6606	switch (state) {
   6607	case AMDGPU_IRQ_STATE_DISABLE:
   6608		enable_flag = 1;
   6609		break;
   6610
   6611	case AMDGPU_IRQ_STATE_ENABLE:
   6612		enable_flag = 0;
   6613		break;
   6614
   6615	default:
   6616		return -EINVAL;
   6617	}
   6618
   6619	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
   6620		     enable_flag);
   6621
   6622	return 0;
   6623}
   6624
   6625static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
   6626			    struct amdgpu_irq_src *source,
   6627			    struct amdgpu_iv_entry *entry)
   6628{
   6629	int i;
   6630	u8 me_id, pipe_id, queue_id;
   6631	struct amdgpu_ring *ring;
   6632
   6633	DRM_DEBUG("IH: CP EOP\n");
   6634	me_id = (entry->ring_id & 0x0c) >> 2;
   6635	pipe_id = (entry->ring_id & 0x03) >> 0;
   6636	queue_id = (entry->ring_id & 0x70) >> 4;
   6637
   6638	switch (me_id) {
   6639	case 0:
   6640		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
   6641		break;
   6642	case 1:
   6643	case 2:
   6644		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   6645			ring = &adev->gfx.compute_ring[i];
   6646			/* Per-queue interrupt is supported for MEC starting from VI.
   6647			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
   6648			  */
   6649			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
   6650				amdgpu_fence_process(ring);
   6651		}
   6652		break;
   6653	}
   6654	return 0;
   6655}
   6656
   6657static void gfx_v8_0_fault(struct amdgpu_device *adev,
   6658			   struct amdgpu_iv_entry *entry)
   6659{
   6660	u8 me_id, pipe_id, queue_id;
   6661	struct amdgpu_ring *ring;
   6662	int i;
   6663
   6664	me_id = (entry->ring_id & 0x0c) >> 2;
   6665	pipe_id = (entry->ring_id & 0x03) >> 0;
   6666	queue_id = (entry->ring_id & 0x70) >> 4;
   6667
   6668	switch (me_id) {
   6669	case 0:
   6670		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
   6671		break;
   6672	case 1:
   6673	case 2:
   6674		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   6675			ring = &adev->gfx.compute_ring[i];
   6676			if (ring->me == me_id && ring->pipe == pipe_id &&
   6677			    ring->queue == queue_id)
   6678				drm_sched_fault(&ring->sched);
   6679		}
   6680		break;
   6681	}
   6682}
   6683
   6684static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
   6685				 struct amdgpu_irq_src *source,
   6686				 struct amdgpu_iv_entry *entry)
   6687{
   6688	DRM_ERROR("Illegal register access in command stream\n");
   6689	gfx_v8_0_fault(adev, entry);
   6690	return 0;
   6691}
   6692
   6693static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
   6694				  struct amdgpu_irq_src *source,
   6695				  struct amdgpu_iv_entry *entry)
   6696{
   6697	DRM_ERROR("Illegal instruction in command stream\n");
   6698	gfx_v8_0_fault(adev, entry);
   6699	return 0;
   6700}
   6701
   6702static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
   6703				     struct amdgpu_irq_src *source,
   6704				     struct amdgpu_iv_entry *entry)
   6705{
   6706	DRM_ERROR("CP EDC/ECC error detected.");
   6707	return 0;
   6708}
   6709
   6710static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
   6711				  bool from_wq)
   6712{
   6713	u32 enc, se_id, sh_id, cu_id;
   6714	char type[20];
   6715	int sq_edc_source = -1;
   6716
   6717	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
   6718	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
   6719
   6720	switch (enc) {
   6721		case 0:
   6722			DRM_INFO("SQ general purpose intr detected:"
   6723					"se_id %d, immed_overflow %d, host_reg_overflow %d,"
   6724					"host_cmd_overflow %d, cmd_timestamp %d,"
   6725					"reg_timestamp %d, thread_trace_buff_full %d,"
   6726					"wlt %d, thread_trace %d.\n",
   6727					se_id,
   6728					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
   6729					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
   6730					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
   6731					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
   6732					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
   6733					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
   6734					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
   6735					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
   6736					);
   6737			break;
   6738		case 1:
   6739		case 2:
   6740
   6741			cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
   6742			sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
   6743
   6744			/*
   6745			 * This function can be called either directly from ISR
   6746			 * or from BH in which case we can access SQ_EDC_INFO
   6747			 * instance
   6748			 */
   6749			if (from_wq) {
   6750				mutex_lock(&adev->grbm_idx_mutex);
   6751				gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
   6752
   6753				sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
   6754
   6755				gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   6756				mutex_unlock(&adev->grbm_idx_mutex);
   6757			}
   6758
   6759			if (enc == 1)
   6760				sprintf(type, "instruction intr");
   6761			else
   6762				sprintf(type, "EDC/ECC error");
   6763
   6764			DRM_INFO(
   6765				"SQ %s detected: "
   6766					"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
   6767					"trap %s, sq_ed_info.source %s.\n",
   6768					type, se_id, sh_id, cu_id,
   6769					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
   6770					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
   6771					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
   6772					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
   6773					(sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
   6774				);
   6775			break;
   6776		default:
   6777			DRM_ERROR("SQ invalid encoding type\n.");
   6778	}
   6779}
   6780
   6781static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
   6782{
   6783
   6784	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
   6785	struct sq_work *sq_work = container_of(work, struct sq_work, work);
   6786
   6787	gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
   6788}
   6789
   6790static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
   6791			   struct amdgpu_irq_src *source,
   6792			   struct amdgpu_iv_entry *entry)
   6793{
   6794	unsigned ih_data = entry->src_data[0];
   6795
   6796	/*
   6797	 * Try to submit work so SQ_EDC_INFO can be accessed from
   6798	 * BH. If previous work submission hasn't finished yet
   6799	 * just print whatever info is possible directly from the ISR.
   6800	 */
   6801	if (work_pending(&adev->gfx.sq_work.work)) {
   6802		gfx_v8_0_parse_sq_irq(adev, ih_data, false);
   6803	} else {
   6804		adev->gfx.sq_work.ih_data = ih_data;
   6805		schedule_work(&adev->gfx.sq_work.work);
   6806	}
   6807
   6808	return 0;
   6809}
   6810
   6811static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
   6812{
   6813	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
   6814	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
   6815			  PACKET3_TC_ACTION_ENA |
   6816			  PACKET3_SH_KCACHE_ACTION_ENA |
   6817			  PACKET3_SH_ICACHE_ACTION_ENA |
   6818			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
   6819	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
   6820	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
   6821	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
   6822}
   6823
   6824static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
   6825{
   6826	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
   6827	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
   6828			  PACKET3_TC_ACTION_ENA |
   6829			  PACKET3_SH_KCACHE_ACTION_ENA |
   6830			  PACKET3_SH_ICACHE_ACTION_ENA |
   6831			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
   6832	amdgpu_ring_write(ring, 0xffffffff);	/* CP_COHER_SIZE */
   6833	amdgpu_ring_write(ring, 0xff);		/* CP_COHER_SIZE_HI */
   6834	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE */
   6835	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE_HI */
   6836	amdgpu_ring_write(ring, 0x0000000A);	/* poll interval */
   6837}
   6838
   6839
   6840/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
   6841#define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT	0x0000007f
   6842static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
   6843					uint32_t pipe, bool enable)
   6844{
   6845	uint32_t val;
   6846	uint32_t wcl_cs_reg;
   6847
   6848	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
   6849
   6850	switch (pipe) {
   6851	case 0:
   6852		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
   6853		break;
   6854	case 1:
   6855		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
   6856		break;
   6857	case 2:
   6858		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
   6859		break;
   6860	case 3:
   6861		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
   6862		break;
   6863	default:
   6864		DRM_DEBUG("invalid pipe %d\n", pipe);
   6865		return;
   6866	}
   6867
   6868	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
   6869
   6870}
   6871
   6872#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT	0x07ffffff
   6873static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
   6874{
   6875	struct amdgpu_device *adev = ring->adev;
   6876	uint32_t val;
   6877	int i;
   6878
   6879	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
   6880	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
   6881	 * around 25% of gpu resources.
   6882	 */
   6883	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
   6884	amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
   6885
   6886	/* Restrict waves for normal/low priority compute queues as well
   6887	 * to get best QoS for high priority compute jobs.
   6888	 *
   6889	 * amdgpu controls only 1st ME(0-3 CS pipes).
   6890	 */
   6891	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
   6892		if (i != ring->pipe)
   6893			gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
   6894
   6895	}
   6896
   6897}
   6898
   6899static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
   6900	.name = "gfx_v8_0",
   6901	.early_init = gfx_v8_0_early_init,
   6902	.late_init = gfx_v8_0_late_init,
   6903	.sw_init = gfx_v8_0_sw_init,
   6904	.sw_fini = gfx_v8_0_sw_fini,
   6905	.hw_init = gfx_v8_0_hw_init,
   6906	.hw_fini = gfx_v8_0_hw_fini,
   6907	.suspend = gfx_v8_0_suspend,
   6908	.resume = gfx_v8_0_resume,
   6909	.is_idle = gfx_v8_0_is_idle,
   6910	.wait_for_idle = gfx_v8_0_wait_for_idle,
   6911	.check_soft_reset = gfx_v8_0_check_soft_reset,
   6912	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
   6913	.soft_reset = gfx_v8_0_soft_reset,
   6914	.post_soft_reset = gfx_v8_0_post_soft_reset,
   6915	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
   6916	.set_powergating_state = gfx_v8_0_set_powergating_state,
   6917	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
   6918};
   6919
   6920static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
   6921	.type = AMDGPU_RING_TYPE_GFX,
   6922	.align_mask = 0xff,
   6923	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
   6924	.support_64bit_ptrs = false,
   6925	.get_rptr = gfx_v8_0_ring_get_rptr,
   6926	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
   6927	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
   6928	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
   6929		5 +  /* COND_EXEC */
   6930		7 +  /* PIPELINE_SYNC */
   6931		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
   6932		12 +  /* FENCE for VM_FLUSH */
   6933		20 + /* GDS switch */
   6934		4 + /* double SWITCH_BUFFER,
   6935		       the first COND_EXEC jump to the place just
   6936			   prior to this double SWITCH_BUFFER  */
   6937		5 + /* COND_EXEC */
   6938		7 +	 /*	HDP_flush */
   6939		4 +	 /*	VGT_flush */
   6940		14 + /*	CE_META */
   6941		31 + /*	DE_META */
   6942		3 + /* CNTX_CTRL */
   6943		5 + /* HDP_INVL */
   6944		12 + 12 + /* FENCE x2 */
   6945		2 + /* SWITCH_BUFFER */
   6946		5, /* SURFACE_SYNC */
   6947	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
   6948	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
   6949	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
   6950	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
   6951	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
   6952	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
   6953	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
   6954	.test_ring = gfx_v8_0_ring_test_ring,
   6955	.test_ib = gfx_v8_0_ring_test_ib,
   6956	.insert_nop = amdgpu_ring_insert_nop,
   6957	.pad_ib = amdgpu_ring_generic_pad_ib,
   6958	.emit_switch_buffer = gfx_v8_ring_emit_sb,
   6959	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
   6960	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
   6961	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
   6962	.emit_wreg = gfx_v8_0_ring_emit_wreg,
   6963	.soft_recovery = gfx_v8_0_ring_soft_recovery,
   6964	.emit_mem_sync = gfx_v8_0_emit_mem_sync,
   6965};
   6966
   6967static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
   6968	.type = AMDGPU_RING_TYPE_COMPUTE,
   6969	.align_mask = 0xff,
   6970	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
   6971	.support_64bit_ptrs = false,
   6972	.get_rptr = gfx_v8_0_ring_get_rptr,
   6973	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
   6974	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
   6975	.emit_frame_size =
   6976		20 + /* gfx_v8_0_ring_emit_gds_switch */
   6977		7 + /* gfx_v8_0_ring_emit_hdp_flush */
   6978		5 + /* hdp_invalidate */
   6979		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
   6980		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
   6981		7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
   6982		7 + /* gfx_v8_0_emit_mem_sync_compute */
   6983		5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
   6984		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
   6985	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
   6986	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
   6987	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
   6988	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
   6989	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
   6990	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
   6991	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
   6992	.test_ring = gfx_v8_0_ring_test_ring,
   6993	.test_ib = gfx_v8_0_ring_test_ib,
   6994	.insert_nop = amdgpu_ring_insert_nop,
   6995	.pad_ib = amdgpu_ring_generic_pad_ib,
   6996	.emit_wreg = gfx_v8_0_ring_emit_wreg,
   6997	.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
   6998	.emit_wave_limit = gfx_v8_0_emit_wave_limit,
   6999};
   7000
   7001static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
   7002	.type = AMDGPU_RING_TYPE_KIQ,
   7003	.align_mask = 0xff,
   7004	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
   7005	.support_64bit_ptrs = false,
   7006	.get_rptr = gfx_v8_0_ring_get_rptr,
   7007	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
   7008	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
   7009	.emit_frame_size =
   7010		20 + /* gfx_v8_0_ring_emit_gds_switch */
   7011		7 + /* gfx_v8_0_ring_emit_hdp_flush */
   7012		5 + /* hdp_invalidate */
   7013		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
   7014		17 + /* gfx_v8_0_ring_emit_vm_flush */
   7015		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
   7016	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
   7017	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
   7018	.test_ring = gfx_v8_0_ring_test_ring,
   7019	.insert_nop = amdgpu_ring_insert_nop,
   7020	.pad_ib = amdgpu_ring_generic_pad_ib,
   7021	.emit_rreg = gfx_v8_0_ring_emit_rreg,
   7022	.emit_wreg = gfx_v8_0_ring_emit_wreg,
   7023};
   7024
   7025static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
   7026{
   7027	int i;
   7028
   7029	adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
   7030
   7031	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
   7032		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
   7033
   7034	for (i = 0; i < adev->gfx.num_compute_rings; i++)
   7035		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
   7036}
   7037
   7038static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
   7039	.set = gfx_v8_0_set_eop_interrupt_state,
   7040	.process = gfx_v8_0_eop_irq,
   7041};
   7042
   7043static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
   7044	.set = gfx_v8_0_set_priv_reg_fault_state,
   7045	.process = gfx_v8_0_priv_reg_irq,
   7046};
   7047
   7048static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
   7049	.set = gfx_v8_0_set_priv_inst_fault_state,
   7050	.process = gfx_v8_0_priv_inst_irq,
   7051};
   7052
   7053static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
   7054	.set = gfx_v8_0_set_cp_ecc_int_state,
   7055	.process = gfx_v8_0_cp_ecc_error_irq,
   7056};
   7057
   7058static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
   7059	.set = gfx_v8_0_set_sq_int_state,
   7060	.process = gfx_v8_0_sq_irq,
   7061};
   7062
   7063static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
   7064{
   7065	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
   7066	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
   7067
   7068	adev->gfx.priv_reg_irq.num_types = 1;
   7069	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
   7070
   7071	adev->gfx.priv_inst_irq.num_types = 1;
   7072	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
   7073
   7074	adev->gfx.cp_ecc_error_irq.num_types = 1;
   7075	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
   7076
   7077	adev->gfx.sq_irq.num_types = 1;
   7078	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
   7079}
   7080
   7081static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
   7082{
   7083	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
   7084}
   7085
   7086static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
   7087{
   7088	/* init asci gds info */
   7089	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
   7090	adev->gds.gws_size = 64;
   7091	adev->gds.oa_size = 16;
   7092	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
   7093}
   7094
   7095static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
   7096						 u32 bitmap)
   7097{
   7098	u32 data;
   7099
   7100	if (!bitmap)
   7101		return;
   7102
   7103	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
   7104	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
   7105
   7106	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
   7107}
   7108
   7109static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
   7110{
   7111	u32 data, mask;
   7112
   7113	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
   7114		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
   7115
   7116	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
   7117
   7118	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
   7119}
   7120
   7121static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
   7122{
   7123	int i, j, k, counter, active_cu_number = 0;
   7124	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
   7125	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
   7126	unsigned disable_masks[4 * 2];
   7127	u32 ao_cu_num;
   7128
   7129	memset(cu_info, 0, sizeof(*cu_info));
   7130
   7131	if (adev->flags & AMD_IS_APU)
   7132		ao_cu_num = 2;
   7133	else
   7134		ao_cu_num = adev->gfx.config.max_cu_per_sh;
   7135
   7136	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
   7137
   7138	mutex_lock(&adev->grbm_idx_mutex);
   7139	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   7140		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   7141			mask = 1;
   7142			ao_bitmap = 0;
   7143			counter = 0;
   7144			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
   7145			if (i < 4 && j < 2)
   7146				gfx_v8_0_set_user_cu_inactive_bitmap(
   7147					adev, disable_masks[i * 2 + j]);
   7148			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
   7149			cu_info->bitmap[i][j] = bitmap;
   7150
   7151			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
   7152				if (bitmap & mask) {
   7153					if (counter < ao_cu_num)
   7154						ao_bitmap |= mask;
   7155					counter ++;
   7156				}
   7157				mask <<= 1;
   7158			}
   7159			active_cu_number += counter;
   7160			if (i < 2 && j < 2)
   7161				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
   7162			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
   7163		}
   7164	}
   7165	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   7166	mutex_unlock(&adev->grbm_idx_mutex);
   7167
   7168	cu_info->number = active_cu_number;
   7169	cu_info->ao_cu_mask = ao_cu_mask;
   7170	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
   7171	cu_info->max_waves_per_simd = 10;
   7172	cu_info->max_scratch_slots_per_cu = 32;
   7173	cu_info->wave_front_size = 64;
   7174	cu_info->lds_size = 64;
   7175}
   7176
   7177const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
   7178{
   7179	.type = AMD_IP_BLOCK_TYPE_GFX,
   7180	.major = 8,
   7181	.minor = 0,
   7182	.rev = 0,
   7183	.funcs = &gfx_v8_0_ip_funcs,
   7184};
   7185
   7186const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
   7187{
   7188	.type = AMD_IP_BLOCK_TYPE_GFX,
   7189	.major = 8,
   7190	.minor = 1,
   7191	.rev = 0,
   7192	.funcs = &gfx_v8_0_ip_funcs,
   7193};
   7194
   7195static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
   7196{
   7197	uint64_t ce_payload_addr;
   7198	int cnt_ce;
   7199	union {
   7200		struct vi_ce_ib_state regular;
   7201		struct vi_ce_ib_state_chained_ib chained;
   7202	} ce_payload = {};
   7203
   7204	if (ring->adev->virt.chained_ib_support) {
   7205		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
   7206			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
   7207		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
   7208	} else {
   7209		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
   7210			offsetof(struct vi_gfx_meta_data, ce_payload);
   7211		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
   7212	}
   7213
   7214	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
   7215	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
   7216				WRITE_DATA_DST_SEL(8) |
   7217				WR_CONFIRM) |
   7218				WRITE_DATA_CACHE_POLICY(0));
   7219	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
   7220	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
   7221	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
   7222}
   7223
   7224static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
   7225{
   7226	uint64_t de_payload_addr, gds_addr, csa_addr;
   7227	int cnt_de;
   7228	union {
   7229		struct vi_de_ib_state regular;
   7230		struct vi_de_ib_state_chained_ib chained;
   7231	} de_payload = {};
   7232
   7233	csa_addr = amdgpu_csa_vaddr(ring->adev);
   7234	gds_addr = csa_addr + 4096;
   7235	if (ring->adev->virt.chained_ib_support) {
   7236		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
   7237		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
   7238		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
   7239		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
   7240	} else {
   7241		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
   7242		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
   7243		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
   7244		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
   7245	}
   7246
   7247	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
   7248	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
   7249				WRITE_DATA_DST_SEL(8) |
   7250				WR_CONFIRM) |
   7251				WRITE_DATA_CACHE_POLICY(0));
   7252	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
   7253	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
   7254	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
   7255}