cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

gfx_v7_0.c (158112B)


      1/*
      2 * Copyright 2014 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 */
     23
     24#include <linux/firmware.h>
     25#include <linux/module.h>
     26
     27#include "amdgpu.h"
     28#include "amdgpu_ih.h"
     29#include "amdgpu_gfx.h"
     30#include "cikd.h"
     31#include "cik.h"
     32#include "cik_structs.h"
     33#include "atom.h"
     34#include "amdgpu_ucode.h"
     35#include "clearstate_ci.h"
     36
     37#include "dce/dce_8_0_d.h"
     38#include "dce/dce_8_0_sh_mask.h"
     39
     40#include "bif/bif_4_1_d.h"
     41#include "bif/bif_4_1_sh_mask.h"
     42
     43#include "gca/gfx_7_0_d.h"
     44#include "gca/gfx_7_2_enum.h"
     45#include "gca/gfx_7_2_sh_mask.h"
     46
     47#include "gmc/gmc_7_0_d.h"
     48#include "gmc/gmc_7_0_sh_mask.h"
     49
     50#include "oss/oss_2_0_d.h"
     51#include "oss/oss_2_0_sh_mask.h"
     52
     53#define NUM_SIMD_PER_CU 0x4 /* missing from the gfx_7 IP headers */
     54
     55#define GFX7_NUM_GFX_RINGS     1
     56#define GFX7_MEC_HPD_SIZE      2048
     57
     58static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
     59static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
     60static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
     61
     62MODULE_FIRMWARE("amdgpu/bonaire_pfp.bin");
     63MODULE_FIRMWARE("amdgpu/bonaire_me.bin");
     64MODULE_FIRMWARE("amdgpu/bonaire_ce.bin");
     65MODULE_FIRMWARE("amdgpu/bonaire_rlc.bin");
     66MODULE_FIRMWARE("amdgpu/bonaire_mec.bin");
     67
     68MODULE_FIRMWARE("amdgpu/hawaii_pfp.bin");
     69MODULE_FIRMWARE("amdgpu/hawaii_me.bin");
     70MODULE_FIRMWARE("amdgpu/hawaii_ce.bin");
     71MODULE_FIRMWARE("amdgpu/hawaii_rlc.bin");
     72MODULE_FIRMWARE("amdgpu/hawaii_mec.bin");
     73
     74MODULE_FIRMWARE("amdgpu/kaveri_pfp.bin");
     75MODULE_FIRMWARE("amdgpu/kaveri_me.bin");
     76MODULE_FIRMWARE("amdgpu/kaveri_ce.bin");
     77MODULE_FIRMWARE("amdgpu/kaveri_rlc.bin");
     78MODULE_FIRMWARE("amdgpu/kaveri_mec.bin");
     79MODULE_FIRMWARE("amdgpu/kaveri_mec2.bin");
     80
     81MODULE_FIRMWARE("amdgpu/kabini_pfp.bin");
     82MODULE_FIRMWARE("amdgpu/kabini_me.bin");
     83MODULE_FIRMWARE("amdgpu/kabini_ce.bin");
     84MODULE_FIRMWARE("amdgpu/kabini_rlc.bin");
     85MODULE_FIRMWARE("amdgpu/kabini_mec.bin");
     86
     87MODULE_FIRMWARE("amdgpu/mullins_pfp.bin");
     88MODULE_FIRMWARE("amdgpu/mullins_me.bin");
     89MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
     90MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
     91MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
     92
     93static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
     94{
     95	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
     96	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
     97	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
     98	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
     99	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
    100	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
    101	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
    102	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
    103	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
    104	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
    105	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
    106	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
    107	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
    108	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
    109	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
    110	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
    111};
    112
    113static const u32 spectre_rlc_save_restore_register_list[] =
    114{
    115	(0x0e00 << 16) | (0xc12c >> 2),
    116	0x00000000,
    117	(0x0e00 << 16) | (0xc140 >> 2),
    118	0x00000000,
    119	(0x0e00 << 16) | (0xc150 >> 2),
    120	0x00000000,
    121	(0x0e00 << 16) | (0xc15c >> 2),
    122	0x00000000,
    123	(0x0e00 << 16) | (0xc168 >> 2),
    124	0x00000000,
    125	(0x0e00 << 16) | (0xc170 >> 2),
    126	0x00000000,
    127	(0x0e00 << 16) | (0xc178 >> 2),
    128	0x00000000,
    129	(0x0e00 << 16) | (0xc204 >> 2),
    130	0x00000000,
    131	(0x0e00 << 16) | (0xc2b4 >> 2),
    132	0x00000000,
    133	(0x0e00 << 16) | (0xc2b8 >> 2),
    134	0x00000000,
    135	(0x0e00 << 16) | (0xc2bc >> 2),
    136	0x00000000,
    137	(0x0e00 << 16) | (0xc2c0 >> 2),
    138	0x00000000,
    139	(0x0e00 << 16) | (0x8228 >> 2),
    140	0x00000000,
    141	(0x0e00 << 16) | (0x829c >> 2),
    142	0x00000000,
    143	(0x0e00 << 16) | (0x869c >> 2),
    144	0x00000000,
    145	(0x0600 << 16) | (0x98f4 >> 2),
    146	0x00000000,
    147	(0x0e00 << 16) | (0x98f8 >> 2),
    148	0x00000000,
    149	(0x0e00 << 16) | (0x9900 >> 2),
    150	0x00000000,
    151	(0x0e00 << 16) | (0xc260 >> 2),
    152	0x00000000,
    153	(0x0e00 << 16) | (0x90e8 >> 2),
    154	0x00000000,
    155	(0x0e00 << 16) | (0x3c000 >> 2),
    156	0x00000000,
    157	(0x0e00 << 16) | (0x3c00c >> 2),
    158	0x00000000,
    159	(0x0e00 << 16) | (0x8c1c >> 2),
    160	0x00000000,
    161	(0x0e00 << 16) | (0x9700 >> 2),
    162	0x00000000,
    163	(0x0e00 << 16) | (0xcd20 >> 2),
    164	0x00000000,
    165	(0x4e00 << 16) | (0xcd20 >> 2),
    166	0x00000000,
    167	(0x5e00 << 16) | (0xcd20 >> 2),
    168	0x00000000,
    169	(0x6e00 << 16) | (0xcd20 >> 2),
    170	0x00000000,
    171	(0x7e00 << 16) | (0xcd20 >> 2),
    172	0x00000000,
    173	(0x8e00 << 16) | (0xcd20 >> 2),
    174	0x00000000,
    175	(0x9e00 << 16) | (0xcd20 >> 2),
    176	0x00000000,
    177	(0xae00 << 16) | (0xcd20 >> 2),
    178	0x00000000,
    179	(0xbe00 << 16) | (0xcd20 >> 2),
    180	0x00000000,
    181	(0x0e00 << 16) | (0x89bc >> 2),
    182	0x00000000,
    183	(0x0e00 << 16) | (0x8900 >> 2),
    184	0x00000000,
    185	0x3,
    186	(0x0e00 << 16) | (0xc130 >> 2),
    187	0x00000000,
    188	(0x0e00 << 16) | (0xc134 >> 2),
    189	0x00000000,
    190	(0x0e00 << 16) | (0xc1fc >> 2),
    191	0x00000000,
    192	(0x0e00 << 16) | (0xc208 >> 2),
    193	0x00000000,
    194	(0x0e00 << 16) | (0xc264 >> 2),
    195	0x00000000,
    196	(0x0e00 << 16) | (0xc268 >> 2),
    197	0x00000000,
    198	(0x0e00 << 16) | (0xc26c >> 2),
    199	0x00000000,
    200	(0x0e00 << 16) | (0xc270 >> 2),
    201	0x00000000,
    202	(0x0e00 << 16) | (0xc274 >> 2),
    203	0x00000000,
    204	(0x0e00 << 16) | (0xc278 >> 2),
    205	0x00000000,
    206	(0x0e00 << 16) | (0xc27c >> 2),
    207	0x00000000,
    208	(0x0e00 << 16) | (0xc280 >> 2),
    209	0x00000000,
    210	(0x0e00 << 16) | (0xc284 >> 2),
    211	0x00000000,
    212	(0x0e00 << 16) | (0xc288 >> 2),
    213	0x00000000,
    214	(0x0e00 << 16) | (0xc28c >> 2),
    215	0x00000000,
    216	(0x0e00 << 16) | (0xc290 >> 2),
    217	0x00000000,
    218	(0x0e00 << 16) | (0xc294 >> 2),
    219	0x00000000,
    220	(0x0e00 << 16) | (0xc298 >> 2),
    221	0x00000000,
    222	(0x0e00 << 16) | (0xc29c >> 2),
    223	0x00000000,
    224	(0x0e00 << 16) | (0xc2a0 >> 2),
    225	0x00000000,
    226	(0x0e00 << 16) | (0xc2a4 >> 2),
    227	0x00000000,
    228	(0x0e00 << 16) | (0xc2a8 >> 2),
    229	0x00000000,
    230	(0x0e00 << 16) | (0xc2ac  >> 2),
    231	0x00000000,
    232	(0x0e00 << 16) | (0xc2b0 >> 2),
    233	0x00000000,
    234	(0x0e00 << 16) | (0x301d0 >> 2),
    235	0x00000000,
    236	(0x0e00 << 16) | (0x30238 >> 2),
    237	0x00000000,
    238	(0x0e00 << 16) | (0x30250 >> 2),
    239	0x00000000,
    240	(0x0e00 << 16) | (0x30254 >> 2),
    241	0x00000000,
    242	(0x0e00 << 16) | (0x30258 >> 2),
    243	0x00000000,
    244	(0x0e00 << 16) | (0x3025c >> 2),
    245	0x00000000,
    246	(0x4e00 << 16) | (0xc900 >> 2),
    247	0x00000000,
    248	(0x5e00 << 16) | (0xc900 >> 2),
    249	0x00000000,
    250	(0x6e00 << 16) | (0xc900 >> 2),
    251	0x00000000,
    252	(0x7e00 << 16) | (0xc900 >> 2),
    253	0x00000000,
    254	(0x8e00 << 16) | (0xc900 >> 2),
    255	0x00000000,
    256	(0x9e00 << 16) | (0xc900 >> 2),
    257	0x00000000,
    258	(0xae00 << 16) | (0xc900 >> 2),
    259	0x00000000,
    260	(0xbe00 << 16) | (0xc900 >> 2),
    261	0x00000000,
    262	(0x4e00 << 16) | (0xc904 >> 2),
    263	0x00000000,
    264	(0x5e00 << 16) | (0xc904 >> 2),
    265	0x00000000,
    266	(0x6e00 << 16) | (0xc904 >> 2),
    267	0x00000000,
    268	(0x7e00 << 16) | (0xc904 >> 2),
    269	0x00000000,
    270	(0x8e00 << 16) | (0xc904 >> 2),
    271	0x00000000,
    272	(0x9e00 << 16) | (0xc904 >> 2),
    273	0x00000000,
    274	(0xae00 << 16) | (0xc904 >> 2),
    275	0x00000000,
    276	(0xbe00 << 16) | (0xc904 >> 2),
    277	0x00000000,
    278	(0x4e00 << 16) | (0xc908 >> 2),
    279	0x00000000,
    280	(0x5e00 << 16) | (0xc908 >> 2),
    281	0x00000000,
    282	(0x6e00 << 16) | (0xc908 >> 2),
    283	0x00000000,
    284	(0x7e00 << 16) | (0xc908 >> 2),
    285	0x00000000,
    286	(0x8e00 << 16) | (0xc908 >> 2),
    287	0x00000000,
    288	(0x9e00 << 16) | (0xc908 >> 2),
    289	0x00000000,
    290	(0xae00 << 16) | (0xc908 >> 2),
    291	0x00000000,
    292	(0xbe00 << 16) | (0xc908 >> 2),
    293	0x00000000,
    294	(0x4e00 << 16) | (0xc90c >> 2),
    295	0x00000000,
    296	(0x5e00 << 16) | (0xc90c >> 2),
    297	0x00000000,
    298	(0x6e00 << 16) | (0xc90c >> 2),
    299	0x00000000,
    300	(0x7e00 << 16) | (0xc90c >> 2),
    301	0x00000000,
    302	(0x8e00 << 16) | (0xc90c >> 2),
    303	0x00000000,
    304	(0x9e00 << 16) | (0xc90c >> 2),
    305	0x00000000,
    306	(0xae00 << 16) | (0xc90c >> 2),
    307	0x00000000,
    308	(0xbe00 << 16) | (0xc90c >> 2),
    309	0x00000000,
    310	(0x4e00 << 16) | (0xc910 >> 2),
    311	0x00000000,
    312	(0x5e00 << 16) | (0xc910 >> 2),
    313	0x00000000,
    314	(0x6e00 << 16) | (0xc910 >> 2),
    315	0x00000000,
    316	(0x7e00 << 16) | (0xc910 >> 2),
    317	0x00000000,
    318	(0x8e00 << 16) | (0xc910 >> 2),
    319	0x00000000,
    320	(0x9e00 << 16) | (0xc910 >> 2),
    321	0x00000000,
    322	(0xae00 << 16) | (0xc910 >> 2),
    323	0x00000000,
    324	(0xbe00 << 16) | (0xc910 >> 2),
    325	0x00000000,
    326	(0x0e00 << 16) | (0xc99c >> 2),
    327	0x00000000,
    328	(0x0e00 << 16) | (0x9834 >> 2),
    329	0x00000000,
    330	(0x0000 << 16) | (0x30f00 >> 2),
    331	0x00000000,
    332	(0x0001 << 16) | (0x30f00 >> 2),
    333	0x00000000,
    334	(0x0000 << 16) | (0x30f04 >> 2),
    335	0x00000000,
    336	(0x0001 << 16) | (0x30f04 >> 2),
    337	0x00000000,
    338	(0x0000 << 16) | (0x30f08 >> 2),
    339	0x00000000,
    340	(0x0001 << 16) | (0x30f08 >> 2),
    341	0x00000000,
    342	(0x0000 << 16) | (0x30f0c >> 2),
    343	0x00000000,
    344	(0x0001 << 16) | (0x30f0c >> 2),
    345	0x00000000,
    346	(0x0600 << 16) | (0x9b7c >> 2),
    347	0x00000000,
    348	(0x0e00 << 16) | (0x8a14 >> 2),
    349	0x00000000,
    350	(0x0e00 << 16) | (0x8a18 >> 2),
    351	0x00000000,
    352	(0x0600 << 16) | (0x30a00 >> 2),
    353	0x00000000,
    354	(0x0e00 << 16) | (0x8bf0 >> 2),
    355	0x00000000,
    356	(0x0e00 << 16) | (0x8bcc >> 2),
    357	0x00000000,
    358	(0x0e00 << 16) | (0x8b24 >> 2),
    359	0x00000000,
    360	(0x0e00 << 16) | (0x30a04 >> 2),
    361	0x00000000,
    362	(0x0600 << 16) | (0x30a10 >> 2),
    363	0x00000000,
    364	(0x0600 << 16) | (0x30a14 >> 2),
    365	0x00000000,
    366	(0x0600 << 16) | (0x30a18 >> 2),
    367	0x00000000,
    368	(0x0600 << 16) | (0x30a2c >> 2),
    369	0x00000000,
    370	(0x0e00 << 16) | (0xc700 >> 2),
    371	0x00000000,
    372	(0x0e00 << 16) | (0xc704 >> 2),
    373	0x00000000,
    374	(0x0e00 << 16) | (0xc708 >> 2),
    375	0x00000000,
    376	(0x0e00 << 16) | (0xc768 >> 2),
    377	0x00000000,
    378	(0x0400 << 16) | (0xc770 >> 2),
    379	0x00000000,
    380	(0x0400 << 16) | (0xc774 >> 2),
    381	0x00000000,
    382	(0x0400 << 16) | (0xc778 >> 2),
    383	0x00000000,
    384	(0x0400 << 16) | (0xc77c >> 2),
    385	0x00000000,
    386	(0x0400 << 16) | (0xc780 >> 2),
    387	0x00000000,
    388	(0x0400 << 16) | (0xc784 >> 2),
    389	0x00000000,
    390	(0x0400 << 16) | (0xc788 >> 2),
    391	0x00000000,
    392	(0x0400 << 16) | (0xc78c >> 2),
    393	0x00000000,
    394	(0x0400 << 16) | (0xc798 >> 2),
    395	0x00000000,
    396	(0x0400 << 16) | (0xc79c >> 2),
    397	0x00000000,
    398	(0x0400 << 16) | (0xc7a0 >> 2),
    399	0x00000000,
    400	(0x0400 << 16) | (0xc7a4 >> 2),
    401	0x00000000,
    402	(0x0400 << 16) | (0xc7a8 >> 2),
    403	0x00000000,
    404	(0x0400 << 16) | (0xc7ac >> 2),
    405	0x00000000,
    406	(0x0400 << 16) | (0xc7b0 >> 2),
    407	0x00000000,
    408	(0x0400 << 16) | (0xc7b4 >> 2),
    409	0x00000000,
    410	(0x0e00 << 16) | (0x9100 >> 2),
    411	0x00000000,
    412	(0x0e00 << 16) | (0x3c010 >> 2),
    413	0x00000000,
    414	(0x0e00 << 16) | (0x92a8 >> 2),
    415	0x00000000,
    416	(0x0e00 << 16) | (0x92ac >> 2),
    417	0x00000000,
    418	(0x0e00 << 16) | (0x92b4 >> 2),
    419	0x00000000,
    420	(0x0e00 << 16) | (0x92b8 >> 2),
    421	0x00000000,
    422	(0x0e00 << 16) | (0x92bc >> 2),
    423	0x00000000,
    424	(0x0e00 << 16) | (0x92c0 >> 2),
    425	0x00000000,
    426	(0x0e00 << 16) | (0x92c4 >> 2),
    427	0x00000000,
    428	(0x0e00 << 16) | (0x92c8 >> 2),
    429	0x00000000,
    430	(0x0e00 << 16) | (0x92cc >> 2),
    431	0x00000000,
    432	(0x0e00 << 16) | (0x92d0 >> 2),
    433	0x00000000,
    434	(0x0e00 << 16) | (0x8c00 >> 2),
    435	0x00000000,
    436	(0x0e00 << 16) | (0x8c04 >> 2),
    437	0x00000000,
    438	(0x0e00 << 16) | (0x8c20 >> 2),
    439	0x00000000,
    440	(0x0e00 << 16) | (0x8c38 >> 2),
    441	0x00000000,
    442	(0x0e00 << 16) | (0x8c3c >> 2),
    443	0x00000000,
    444	(0x0e00 << 16) | (0xae00 >> 2),
    445	0x00000000,
    446	(0x0e00 << 16) | (0x9604 >> 2),
    447	0x00000000,
    448	(0x0e00 << 16) | (0xac08 >> 2),
    449	0x00000000,
    450	(0x0e00 << 16) | (0xac0c >> 2),
    451	0x00000000,
    452	(0x0e00 << 16) | (0xac10 >> 2),
    453	0x00000000,
    454	(0x0e00 << 16) | (0xac14 >> 2),
    455	0x00000000,
    456	(0x0e00 << 16) | (0xac58 >> 2),
    457	0x00000000,
    458	(0x0e00 << 16) | (0xac68 >> 2),
    459	0x00000000,
    460	(0x0e00 << 16) | (0xac6c >> 2),
    461	0x00000000,
    462	(0x0e00 << 16) | (0xac70 >> 2),
    463	0x00000000,
    464	(0x0e00 << 16) | (0xac74 >> 2),
    465	0x00000000,
    466	(0x0e00 << 16) | (0xac78 >> 2),
    467	0x00000000,
    468	(0x0e00 << 16) | (0xac7c >> 2),
    469	0x00000000,
    470	(0x0e00 << 16) | (0xac80 >> 2),
    471	0x00000000,
    472	(0x0e00 << 16) | (0xac84 >> 2),
    473	0x00000000,
    474	(0x0e00 << 16) | (0xac88 >> 2),
    475	0x00000000,
    476	(0x0e00 << 16) | (0xac8c >> 2),
    477	0x00000000,
    478	(0x0e00 << 16) | (0x970c >> 2),
    479	0x00000000,
    480	(0x0e00 << 16) | (0x9714 >> 2),
    481	0x00000000,
    482	(0x0e00 << 16) | (0x9718 >> 2),
    483	0x00000000,
    484	(0x0e00 << 16) | (0x971c >> 2),
    485	0x00000000,
    486	(0x0e00 << 16) | (0x31068 >> 2),
    487	0x00000000,
    488	(0x4e00 << 16) | (0x31068 >> 2),
    489	0x00000000,
    490	(0x5e00 << 16) | (0x31068 >> 2),
    491	0x00000000,
    492	(0x6e00 << 16) | (0x31068 >> 2),
    493	0x00000000,
    494	(0x7e00 << 16) | (0x31068 >> 2),
    495	0x00000000,
    496	(0x8e00 << 16) | (0x31068 >> 2),
    497	0x00000000,
    498	(0x9e00 << 16) | (0x31068 >> 2),
    499	0x00000000,
    500	(0xae00 << 16) | (0x31068 >> 2),
    501	0x00000000,
    502	(0xbe00 << 16) | (0x31068 >> 2),
    503	0x00000000,
    504	(0x0e00 << 16) | (0xcd10 >> 2),
    505	0x00000000,
    506	(0x0e00 << 16) | (0xcd14 >> 2),
    507	0x00000000,
    508	(0x0e00 << 16) | (0x88b0 >> 2),
    509	0x00000000,
    510	(0x0e00 << 16) | (0x88b4 >> 2),
    511	0x00000000,
    512	(0x0e00 << 16) | (0x88b8 >> 2),
    513	0x00000000,
    514	(0x0e00 << 16) | (0x88bc >> 2),
    515	0x00000000,
    516	(0x0400 << 16) | (0x89c0 >> 2),
    517	0x00000000,
    518	(0x0e00 << 16) | (0x88c4 >> 2),
    519	0x00000000,
    520	(0x0e00 << 16) | (0x88c8 >> 2),
    521	0x00000000,
    522	(0x0e00 << 16) | (0x88d0 >> 2),
    523	0x00000000,
    524	(0x0e00 << 16) | (0x88d4 >> 2),
    525	0x00000000,
    526	(0x0e00 << 16) | (0x88d8 >> 2),
    527	0x00000000,
    528	(0x0e00 << 16) | (0x8980 >> 2),
    529	0x00000000,
    530	(0x0e00 << 16) | (0x30938 >> 2),
    531	0x00000000,
    532	(0x0e00 << 16) | (0x3093c >> 2),
    533	0x00000000,
    534	(0x0e00 << 16) | (0x30940 >> 2),
    535	0x00000000,
    536	(0x0e00 << 16) | (0x89a0 >> 2),
    537	0x00000000,
    538	(0x0e00 << 16) | (0x30900 >> 2),
    539	0x00000000,
    540	(0x0e00 << 16) | (0x30904 >> 2),
    541	0x00000000,
    542	(0x0e00 << 16) | (0x89b4 >> 2),
    543	0x00000000,
    544	(0x0e00 << 16) | (0x3c210 >> 2),
    545	0x00000000,
    546	(0x0e00 << 16) | (0x3c214 >> 2),
    547	0x00000000,
    548	(0x0e00 << 16) | (0x3c218 >> 2),
    549	0x00000000,
    550	(0x0e00 << 16) | (0x8904 >> 2),
    551	0x00000000,
    552	0x5,
    553	(0x0e00 << 16) | (0x8c28 >> 2),
    554	(0x0e00 << 16) | (0x8c2c >> 2),
    555	(0x0e00 << 16) | (0x8c30 >> 2),
    556	(0x0e00 << 16) | (0x8c34 >> 2),
    557	(0x0e00 << 16) | (0x9600 >> 2),
    558};
    559
    560static const u32 kalindi_rlc_save_restore_register_list[] =
    561{
    562	(0x0e00 << 16) | (0xc12c >> 2),
    563	0x00000000,
    564	(0x0e00 << 16) | (0xc140 >> 2),
    565	0x00000000,
    566	(0x0e00 << 16) | (0xc150 >> 2),
    567	0x00000000,
    568	(0x0e00 << 16) | (0xc15c >> 2),
    569	0x00000000,
    570	(0x0e00 << 16) | (0xc168 >> 2),
    571	0x00000000,
    572	(0x0e00 << 16) | (0xc170 >> 2),
    573	0x00000000,
    574	(0x0e00 << 16) | (0xc204 >> 2),
    575	0x00000000,
    576	(0x0e00 << 16) | (0xc2b4 >> 2),
    577	0x00000000,
    578	(0x0e00 << 16) | (0xc2b8 >> 2),
    579	0x00000000,
    580	(0x0e00 << 16) | (0xc2bc >> 2),
    581	0x00000000,
    582	(0x0e00 << 16) | (0xc2c0 >> 2),
    583	0x00000000,
    584	(0x0e00 << 16) | (0x8228 >> 2),
    585	0x00000000,
    586	(0x0e00 << 16) | (0x829c >> 2),
    587	0x00000000,
    588	(0x0e00 << 16) | (0x869c >> 2),
    589	0x00000000,
    590	(0x0600 << 16) | (0x98f4 >> 2),
    591	0x00000000,
    592	(0x0e00 << 16) | (0x98f8 >> 2),
    593	0x00000000,
    594	(0x0e00 << 16) | (0x9900 >> 2),
    595	0x00000000,
    596	(0x0e00 << 16) | (0xc260 >> 2),
    597	0x00000000,
    598	(0x0e00 << 16) | (0x90e8 >> 2),
    599	0x00000000,
    600	(0x0e00 << 16) | (0x3c000 >> 2),
    601	0x00000000,
    602	(0x0e00 << 16) | (0x3c00c >> 2),
    603	0x00000000,
    604	(0x0e00 << 16) | (0x8c1c >> 2),
    605	0x00000000,
    606	(0x0e00 << 16) | (0x9700 >> 2),
    607	0x00000000,
    608	(0x0e00 << 16) | (0xcd20 >> 2),
    609	0x00000000,
    610	(0x4e00 << 16) | (0xcd20 >> 2),
    611	0x00000000,
    612	(0x5e00 << 16) | (0xcd20 >> 2),
    613	0x00000000,
    614	(0x6e00 << 16) | (0xcd20 >> 2),
    615	0x00000000,
    616	(0x7e00 << 16) | (0xcd20 >> 2),
    617	0x00000000,
    618	(0x0e00 << 16) | (0x89bc >> 2),
    619	0x00000000,
    620	(0x0e00 << 16) | (0x8900 >> 2),
    621	0x00000000,
    622	0x3,
    623	(0x0e00 << 16) | (0xc130 >> 2),
    624	0x00000000,
    625	(0x0e00 << 16) | (0xc134 >> 2),
    626	0x00000000,
    627	(0x0e00 << 16) | (0xc1fc >> 2),
    628	0x00000000,
    629	(0x0e00 << 16) | (0xc208 >> 2),
    630	0x00000000,
    631	(0x0e00 << 16) | (0xc264 >> 2),
    632	0x00000000,
    633	(0x0e00 << 16) | (0xc268 >> 2),
    634	0x00000000,
    635	(0x0e00 << 16) | (0xc26c >> 2),
    636	0x00000000,
    637	(0x0e00 << 16) | (0xc270 >> 2),
    638	0x00000000,
    639	(0x0e00 << 16) | (0xc274 >> 2),
    640	0x00000000,
    641	(0x0e00 << 16) | (0xc28c >> 2),
    642	0x00000000,
    643	(0x0e00 << 16) | (0xc290 >> 2),
    644	0x00000000,
    645	(0x0e00 << 16) | (0xc294 >> 2),
    646	0x00000000,
    647	(0x0e00 << 16) | (0xc298 >> 2),
    648	0x00000000,
    649	(0x0e00 << 16) | (0xc2a0 >> 2),
    650	0x00000000,
    651	(0x0e00 << 16) | (0xc2a4 >> 2),
    652	0x00000000,
    653	(0x0e00 << 16) | (0xc2a8 >> 2),
    654	0x00000000,
    655	(0x0e00 << 16) | (0xc2ac >> 2),
    656	0x00000000,
    657	(0x0e00 << 16) | (0x301d0 >> 2),
    658	0x00000000,
    659	(0x0e00 << 16) | (0x30238 >> 2),
    660	0x00000000,
    661	(0x0e00 << 16) | (0x30250 >> 2),
    662	0x00000000,
    663	(0x0e00 << 16) | (0x30254 >> 2),
    664	0x00000000,
    665	(0x0e00 << 16) | (0x30258 >> 2),
    666	0x00000000,
    667	(0x0e00 << 16) | (0x3025c >> 2),
    668	0x00000000,
    669	(0x4e00 << 16) | (0xc900 >> 2),
    670	0x00000000,
    671	(0x5e00 << 16) | (0xc900 >> 2),
    672	0x00000000,
    673	(0x6e00 << 16) | (0xc900 >> 2),
    674	0x00000000,
    675	(0x7e00 << 16) | (0xc900 >> 2),
    676	0x00000000,
    677	(0x4e00 << 16) | (0xc904 >> 2),
    678	0x00000000,
    679	(0x5e00 << 16) | (0xc904 >> 2),
    680	0x00000000,
    681	(0x6e00 << 16) | (0xc904 >> 2),
    682	0x00000000,
    683	(0x7e00 << 16) | (0xc904 >> 2),
    684	0x00000000,
    685	(0x4e00 << 16) | (0xc908 >> 2),
    686	0x00000000,
    687	(0x5e00 << 16) | (0xc908 >> 2),
    688	0x00000000,
    689	(0x6e00 << 16) | (0xc908 >> 2),
    690	0x00000000,
    691	(0x7e00 << 16) | (0xc908 >> 2),
    692	0x00000000,
    693	(0x4e00 << 16) | (0xc90c >> 2),
    694	0x00000000,
    695	(0x5e00 << 16) | (0xc90c >> 2),
    696	0x00000000,
    697	(0x6e00 << 16) | (0xc90c >> 2),
    698	0x00000000,
    699	(0x7e00 << 16) | (0xc90c >> 2),
    700	0x00000000,
    701	(0x4e00 << 16) | (0xc910 >> 2),
    702	0x00000000,
    703	(0x5e00 << 16) | (0xc910 >> 2),
    704	0x00000000,
    705	(0x6e00 << 16) | (0xc910 >> 2),
    706	0x00000000,
    707	(0x7e00 << 16) | (0xc910 >> 2),
    708	0x00000000,
    709	(0x0e00 << 16) | (0xc99c >> 2),
    710	0x00000000,
    711	(0x0e00 << 16) | (0x9834 >> 2),
    712	0x00000000,
    713	(0x0000 << 16) | (0x30f00 >> 2),
    714	0x00000000,
    715	(0x0000 << 16) | (0x30f04 >> 2),
    716	0x00000000,
    717	(0x0000 << 16) | (0x30f08 >> 2),
    718	0x00000000,
    719	(0x0000 << 16) | (0x30f0c >> 2),
    720	0x00000000,
    721	(0x0600 << 16) | (0x9b7c >> 2),
    722	0x00000000,
    723	(0x0e00 << 16) | (0x8a14 >> 2),
    724	0x00000000,
    725	(0x0e00 << 16) | (0x8a18 >> 2),
    726	0x00000000,
    727	(0x0600 << 16) | (0x30a00 >> 2),
    728	0x00000000,
    729	(0x0e00 << 16) | (0x8bf0 >> 2),
    730	0x00000000,
    731	(0x0e00 << 16) | (0x8bcc >> 2),
    732	0x00000000,
    733	(0x0e00 << 16) | (0x8b24 >> 2),
    734	0x00000000,
    735	(0x0e00 << 16) | (0x30a04 >> 2),
    736	0x00000000,
    737	(0x0600 << 16) | (0x30a10 >> 2),
    738	0x00000000,
    739	(0x0600 << 16) | (0x30a14 >> 2),
    740	0x00000000,
    741	(0x0600 << 16) | (0x30a18 >> 2),
    742	0x00000000,
    743	(0x0600 << 16) | (0x30a2c >> 2),
    744	0x00000000,
    745	(0x0e00 << 16) | (0xc700 >> 2),
    746	0x00000000,
    747	(0x0e00 << 16) | (0xc704 >> 2),
    748	0x00000000,
    749	(0x0e00 << 16) | (0xc708 >> 2),
    750	0x00000000,
    751	(0x0e00 << 16) | (0xc768 >> 2),
    752	0x00000000,
    753	(0x0400 << 16) | (0xc770 >> 2),
    754	0x00000000,
    755	(0x0400 << 16) | (0xc774 >> 2),
    756	0x00000000,
    757	(0x0400 << 16) | (0xc798 >> 2),
    758	0x00000000,
    759	(0x0400 << 16) | (0xc79c >> 2),
    760	0x00000000,
    761	(0x0e00 << 16) | (0x9100 >> 2),
    762	0x00000000,
    763	(0x0e00 << 16) | (0x3c010 >> 2),
    764	0x00000000,
    765	(0x0e00 << 16) | (0x8c00 >> 2),
    766	0x00000000,
    767	(0x0e00 << 16) | (0x8c04 >> 2),
    768	0x00000000,
    769	(0x0e00 << 16) | (0x8c20 >> 2),
    770	0x00000000,
    771	(0x0e00 << 16) | (0x8c38 >> 2),
    772	0x00000000,
    773	(0x0e00 << 16) | (0x8c3c >> 2),
    774	0x00000000,
    775	(0x0e00 << 16) | (0xae00 >> 2),
    776	0x00000000,
    777	(0x0e00 << 16) | (0x9604 >> 2),
    778	0x00000000,
    779	(0x0e00 << 16) | (0xac08 >> 2),
    780	0x00000000,
    781	(0x0e00 << 16) | (0xac0c >> 2),
    782	0x00000000,
    783	(0x0e00 << 16) | (0xac10 >> 2),
    784	0x00000000,
    785	(0x0e00 << 16) | (0xac14 >> 2),
    786	0x00000000,
    787	(0x0e00 << 16) | (0xac58 >> 2),
    788	0x00000000,
    789	(0x0e00 << 16) | (0xac68 >> 2),
    790	0x00000000,
    791	(0x0e00 << 16) | (0xac6c >> 2),
    792	0x00000000,
    793	(0x0e00 << 16) | (0xac70 >> 2),
    794	0x00000000,
    795	(0x0e00 << 16) | (0xac74 >> 2),
    796	0x00000000,
    797	(0x0e00 << 16) | (0xac78 >> 2),
    798	0x00000000,
    799	(0x0e00 << 16) | (0xac7c >> 2),
    800	0x00000000,
    801	(0x0e00 << 16) | (0xac80 >> 2),
    802	0x00000000,
    803	(0x0e00 << 16) | (0xac84 >> 2),
    804	0x00000000,
    805	(0x0e00 << 16) | (0xac88 >> 2),
    806	0x00000000,
    807	(0x0e00 << 16) | (0xac8c >> 2),
    808	0x00000000,
    809	(0x0e00 << 16) | (0x970c >> 2),
    810	0x00000000,
    811	(0x0e00 << 16) | (0x9714 >> 2),
    812	0x00000000,
    813	(0x0e00 << 16) | (0x9718 >> 2),
    814	0x00000000,
    815	(0x0e00 << 16) | (0x971c >> 2),
    816	0x00000000,
    817	(0x0e00 << 16) | (0x31068 >> 2),
    818	0x00000000,
    819	(0x4e00 << 16) | (0x31068 >> 2),
    820	0x00000000,
    821	(0x5e00 << 16) | (0x31068 >> 2),
    822	0x00000000,
    823	(0x6e00 << 16) | (0x31068 >> 2),
    824	0x00000000,
    825	(0x7e00 << 16) | (0x31068 >> 2),
    826	0x00000000,
    827	(0x0e00 << 16) | (0xcd10 >> 2),
    828	0x00000000,
    829	(0x0e00 << 16) | (0xcd14 >> 2),
    830	0x00000000,
    831	(0x0e00 << 16) | (0x88b0 >> 2),
    832	0x00000000,
    833	(0x0e00 << 16) | (0x88b4 >> 2),
    834	0x00000000,
    835	(0x0e00 << 16) | (0x88b8 >> 2),
    836	0x00000000,
    837	(0x0e00 << 16) | (0x88bc >> 2),
    838	0x00000000,
    839	(0x0400 << 16) | (0x89c0 >> 2),
    840	0x00000000,
    841	(0x0e00 << 16) | (0x88c4 >> 2),
    842	0x00000000,
    843	(0x0e00 << 16) | (0x88c8 >> 2),
    844	0x00000000,
    845	(0x0e00 << 16) | (0x88d0 >> 2),
    846	0x00000000,
    847	(0x0e00 << 16) | (0x88d4 >> 2),
    848	0x00000000,
    849	(0x0e00 << 16) | (0x88d8 >> 2),
    850	0x00000000,
    851	(0x0e00 << 16) | (0x8980 >> 2),
    852	0x00000000,
    853	(0x0e00 << 16) | (0x30938 >> 2),
    854	0x00000000,
    855	(0x0e00 << 16) | (0x3093c >> 2),
    856	0x00000000,
    857	(0x0e00 << 16) | (0x30940 >> 2),
    858	0x00000000,
    859	(0x0e00 << 16) | (0x89a0 >> 2),
    860	0x00000000,
    861	(0x0e00 << 16) | (0x30900 >> 2),
    862	0x00000000,
    863	(0x0e00 << 16) | (0x30904 >> 2),
    864	0x00000000,
    865	(0x0e00 << 16) | (0x89b4 >> 2),
    866	0x00000000,
    867	(0x0e00 << 16) | (0x3e1fc >> 2),
    868	0x00000000,
    869	(0x0e00 << 16) | (0x3c210 >> 2),
    870	0x00000000,
    871	(0x0e00 << 16) | (0x3c214 >> 2),
    872	0x00000000,
    873	(0x0e00 << 16) | (0x3c218 >> 2),
    874	0x00000000,
    875	(0x0e00 << 16) | (0x8904 >> 2),
    876	0x00000000,
    877	0x5,
    878	(0x0e00 << 16) | (0x8c28 >> 2),
    879	(0x0e00 << 16) | (0x8c2c >> 2),
    880	(0x0e00 << 16) | (0x8c30 >> 2),
    881	(0x0e00 << 16) | (0x8c34 >> 2),
    882	(0x0e00 << 16) | (0x9600 >> 2),
    883};
    884
    885static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
    886static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
    887static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
    888static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
    889
    890/*
    891 * Core functions
    892 */
    893/**
    894 * gfx_v7_0_init_microcode - load ucode images from disk
    895 *
    896 * @adev: amdgpu_device pointer
    897 *
    898 * Use the firmware interface to load the ucode images into
    899 * the driver (not loaded into hw).
    900 * Returns 0 on success, error on failure.
    901 */
    902static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
    903{
    904	const char *chip_name;
    905	char fw_name[30];
    906	int err;
    907
    908	DRM_DEBUG("\n");
    909
    910	switch (adev->asic_type) {
    911	case CHIP_BONAIRE:
    912		chip_name = "bonaire";
    913		break;
    914	case CHIP_HAWAII:
    915		chip_name = "hawaii";
    916		break;
    917	case CHIP_KAVERI:
    918		chip_name = "kaveri";
    919		break;
    920	case CHIP_KABINI:
    921		chip_name = "kabini";
    922		break;
    923	case CHIP_MULLINS:
    924		chip_name = "mullins";
    925		break;
    926	default: BUG();
    927	}
    928
    929	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
    930	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
    931	if (err)
    932		goto out;
    933	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
    934	if (err)
    935		goto out;
    936
    937	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
    938	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
    939	if (err)
    940		goto out;
    941	err = amdgpu_ucode_validate(adev->gfx.me_fw);
    942	if (err)
    943		goto out;
    944
    945	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
    946	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
    947	if (err)
    948		goto out;
    949	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
    950	if (err)
    951		goto out;
    952
    953	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
    954	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
    955	if (err)
    956		goto out;
    957	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
    958	if (err)
    959		goto out;
    960
    961	if (adev->asic_type == CHIP_KAVERI) {
    962		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
    963		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
    964		if (err)
    965			goto out;
    966		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
    967		if (err)
    968			goto out;
    969	}
    970
    971	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
    972	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
    973	if (err)
    974		goto out;
    975	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
    976
    977out:
    978	if (err) {
    979		pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
    980		release_firmware(adev->gfx.pfp_fw);
    981		adev->gfx.pfp_fw = NULL;
    982		release_firmware(adev->gfx.me_fw);
    983		adev->gfx.me_fw = NULL;
    984		release_firmware(adev->gfx.ce_fw);
    985		adev->gfx.ce_fw = NULL;
    986		release_firmware(adev->gfx.mec_fw);
    987		adev->gfx.mec_fw = NULL;
    988		release_firmware(adev->gfx.mec2_fw);
    989		adev->gfx.mec2_fw = NULL;
    990		release_firmware(adev->gfx.rlc_fw);
    991		adev->gfx.rlc_fw = NULL;
    992	}
    993	return err;
    994}
    995
    996static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
    997{
    998	release_firmware(adev->gfx.pfp_fw);
    999	adev->gfx.pfp_fw = NULL;
   1000	release_firmware(adev->gfx.me_fw);
   1001	adev->gfx.me_fw = NULL;
   1002	release_firmware(adev->gfx.ce_fw);
   1003	adev->gfx.ce_fw = NULL;
   1004	release_firmware(adev->gfx.mec_fw);
   1005	adev->gfx.mec_fw = NULL;
   1006	release_firmware(adev->gfx.mec2_fw);
   1007	adev->gfx.mec2_fw = NULL;
   1008	release_firmware(adev->gfx.rlc_fw);
   1009	adev->gfx.rlc_fw = NULL;
   1010}
   1011
   1012/**
   1013 * gfx_v7_0_tiling_mode_table_init - init the hw tiling table
   1014 *
   1015 * @adev: amdgpu_device pointer
   1016 *
   1017 * Starting with SI, the tiling setup is done globally in a
   1018 * set of 32 tiling modes.  Rather than selecting each set of
   1019 * parameters per surface as on older asics, we just select
   1020 * which index in the tiling table we want to use, and the
   1021 * surface uses those parameters (CIK).
   1022 */
   1023static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
   1024{
   1025	const u32 num_tile_mode_states =
   1026			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
   1027	const u32 num_secondary_tile_mode_states =
   1028			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
   1029	u32 reg_offset, split_equal_to_row_size;
   1030	uint32_t *tile, *macrotile;
   1031
   1032	tile = adev->gfx.config.tile_mode_array;
   1033	macrotile = adev->gfx.config.macrotile_mode_array;
   1034
   1035	switch (adev->gfx.config.mem_row_size_in_kb) {
   1036	case 1:
   1037		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
   1038		break;
   1039	case 2:
   1040	default:
   1041		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
   1042		break;
   1043	case 4:
   1044		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
   1045		break;
   1046	}
   1047
   1048	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   1049		tile[reg_offset] = 0;
   1050	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   1051		macrotile[reg_offset] = 0;
   1052
   1053	switch (adev->asic_type) {
   1054	case CHIP_BONAIRE:
   1055		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1056			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1057			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
   1058			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1059		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1060			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1061			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
   1062			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1063		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1064			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1065			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
   1066			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1067		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1068			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1069			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
   1070			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1071		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1072			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1073			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   1074			   TILE_SPLIT(split_equal_to_row_size));
   1075		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   1076			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1077			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1078		tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1079			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1080			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   1081			   TILE_SPLIT(split_equal_to_row_size));
   1082		tile[7] = (TILE_SPLIT(split_equal_to_row_size));
   1083		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   1084			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
   1085		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   1086			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1087			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
   1088		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1089			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1090			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   1091			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   1092		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1093			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1094			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   1095			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   1096		tile[12] = (TILE_SPLIT(split_equal_to_row_size));
   1097		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   1098			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1099			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   1100		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1101			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1102			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1103			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   1104		tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
   1105			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1106			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1107			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   1108		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1109			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1110			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1111			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   1112		tile[17] = (TILE_SPLIT(split_equal_to_row_size));
   1113		tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   1114			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1115			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1116			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1117		tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   1118			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1119			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   1120		tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   1121			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1122			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1123			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1124		tile[21] =  (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
   1125			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1126			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1127			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1128		tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   1129			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1130			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1131			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1132		tile[23] = (TILE_SPLIT(split_equal_to_row_size));
   1133		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   1134			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1135			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1136			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1137		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
   1138			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1139			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1140			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1141		tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
   1142			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1143			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1144			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1145		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   1146			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1147			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
   1148		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1149			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1150			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   1151			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   1152		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1153			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1154			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   1155			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   1156		tile[30] = (TILE_SPLIT(split_equal_to_row_size));
   1157
   1158		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1159				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   1160				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1161				NUM_BANKS(ADDR_SURF_16_BANK));
   1162		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1163				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   1164				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1165				NUM_BANKS(ADDR_SURF_16_BANK));
   1166		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1167				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1168				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1169				NUM_BANKS(ADDR_SURF_16_BANK));
   1170		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1171				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1172				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1173				NUM_BANKS(ADDR_SURF_16_BANK));
   1174		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1175				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1176				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1177				NUM_BANKS(ADDR_SURF_16_BANK));
   1178		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1179				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1180				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1181				NUM_BANKS(ADDR_SURF_8_BANK));
   1182		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1183				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1184				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   1185				NUM_BANKS(ADDR_SURF_4_BANK));
   1186		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   1187				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   1188				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1189				NUM_BANKS(ADDR_SURF_16_BANK));
   1190		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   1191				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   1192				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1193				NUM_BANKS(ADDR_SURF_16_BANK));
   1194		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1195				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   1196				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1197				NUM_BANKS(ADDR_SURF_16_BANK));
   1198		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1199				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   1200				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1201				NUM_BANKS(ADDR_SURF_16_BANK));
   1202		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1203				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1204				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1205				NUM_BANKS(ADDR_SURF_16_BANK));
   1206		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1207				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1208				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1209				NUM_BANKS(ADDR_SURF_8_BANK));
   1210		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1211				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1212				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   1213				NUM_BANKS(ADDR_SURF_4_BANK));
   1214
   1215		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   1216			WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
   1217		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   1218			if (reg_offset != 7)
   1219				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
   1220		break;
   1221	case CHIP_HAWAII:
   1222		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1223			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1224			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
   1225			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1226		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1227			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1228			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
   1229			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1230		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1231			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1232			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
   1233			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1234		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1235			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1236			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
   1237			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1238		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1239			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1240			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   1241			   TILE_SPLIT(split_equal_to_row_size));
   1242		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   1243			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1244			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   1245			   TILE_SPLIT(split_equal_to_row_size));
   1246		tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1247			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1248			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   1249			   TILE_SPLIT(split_equal_to_row_size));
   1250		tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1251			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1252			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   1253			   TILE_SPLIT(split_equal_to_row_size));
   1254		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   1255			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
   1256		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   1257			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1258			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
   1259		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1260			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1261			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   1262			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   1263		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1264			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1265			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   1266			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   1267		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   1268			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1269			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   1270			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   1271		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   1272			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1273			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   1274		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1275			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1276			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1277			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   1278		tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
   1279			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1280			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1281			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   1282		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1283			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1284			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1285			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   1286		tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1287			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1288			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1289			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   1290		tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   1291			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1292			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1293			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1294		tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   1295			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1296			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
   1297		tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   1298			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1299			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   1300			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1301		tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
   1302			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1303			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   1304			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1305		tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   1306			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1307			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   1308			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1309		tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   1310			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1311			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   1312			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1313		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   1314			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1315			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1316			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1317		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
   1318			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1319			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   1320			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1321		tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
   1322			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1323			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   1324			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1325		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   1326			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1327			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
   1328		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1329			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1330			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   1331			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   1332		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1333			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   1334			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   1335			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   1336		tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1337			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   1338			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   1339			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   1340
   1341		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1342				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   1343				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1344				NUM_BANKS(ADDR_SURF_16_BANK));
   1345		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1346				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   1347				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1348				NUM_BANKS(ADDR_SURF_16_BANK));
   1349		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1350				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1351				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   1352				NUM_BANKS(ADDR_SURF_16_BANK));
   1353		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1354				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1355				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   1356				NUM_BANKS(ADDR_SURF_16_BANK));
   1357		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1358				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1359				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   1360				NUM_BANKS(ADDR_SURF_8_BANK));
   1361		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1362				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1363				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   1364				NUM_BANKS(ADDR_SURF_4_BANK));
   1365		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1366				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1367				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   1368				NUM_BANKS(ADDR_SURF_4_BANK));
   1369		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1370				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   1371				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1372				NUM_BANKS(ADDR_SURF_16_BANK));
   1373		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1374				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   1375				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1376				NUM_BANKS(ADDR_SURF_16_BANK));
   1377		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1378				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1379				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   1380				NUM_BANKS(ADDR_SURF_16_BANK));
   1381		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1382				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1383				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   1384				NUM_BANKS(ADDR_SURF_8_BANK));
   1385		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1386				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1387				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1388				NUM_BANKS(ADDR_SURF_16_BANK));
   1389		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1390				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1391				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1392				NUM_BANKS(ADDR_SURF_8_BANK));
   1393		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1394				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1395				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   1396				NUM_BANKS(ADDR_SURF_4_BANK));
   1397
   1398		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   1399			WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
   1400		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   1401			if (reg_offset != 7)
   1402				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
   1403		break;
   1404	case CHIP_KABINI:
   1405	case CHIP_KAVERI:
   1406	case CHIP_MULLINS:
   1407	default:
   1408		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1409			   PIPE_CONFIG(ADDR_SURF_P2) |
   1410			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
   1411			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1412		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1413			   PIPE_CONFIG(ADDR_SURF_P2) |
   1414			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
   1415			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1416		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1417			   PIPE_CONFIG(ADDR_SURF_P2) |
   1418			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
   1419			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1420		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1421			   PIPE_CONFIG(ADDR_SURF_P2) |
   1422			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
   1423			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1424		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1425			   PIPE_CONFIG(ADDR_SURF_P2) |
   1426			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   1427			   TILE_SPLIT(split_equal_to_row_size));
   1428		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   1429			   PIPE_CONFIG(ADDR_SURF_P2) |
   1430			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   1431		tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1432			   PIPE_CONFIG(ADDR_SURF_P2) |
   1433			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   1434			   TILE_SPLIT(split_equal_to_row_size));
   1435		tile[7] = (TILE_SPLIT(split_equal_to_row_size));
   1436		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   1437			   PIPE_CONFIG(ADDR_SURF_P2));
   1438		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   1439			   PIPE_CONFIG(ADDR_SURF_P2) |
   1440			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
   1441		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1442			    PIPE_CONFIG(ADDR_SURF_P2) |
   1443			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   1444			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   1445		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1446			    PIPE_CONFIG(ADDR_SURF_P2) |
   1447			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   1448			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   1449		tile[12] = (TILE_SPLIT(split_equal_to_row_size));
   1450		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   1451			    PIPE_CONFIG(ADDR_SURF_P2) |
   1452			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   1453		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1454			    PIPE_CONFIG(ADDR_SURF_P2) |
   1455			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1456			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   1457		tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
   1458			    PIPE_CONFIG(ADDR_SURF_P2) |
   1459			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1460			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   1461		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1462			    PIPE_CONFIG(ADDR_SURF_P2) |
   1463			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1464			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   1465		tile[17] = (TILE_SPLIT(split_equal_to_row_size));
   1466		tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   1467			    PIPE_CONFIG(ADDR_SURF_P2) |
   1468			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1469			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1470		tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
   1471			    PIPE_CONFIG(ADDR_SURF_P2) |
   1472			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
   1473		tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   1474			    PIPE_CONFIG(ADDR_SURF_P2) |
   1475			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   1476			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1477		tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
   1478			    PIPE_CONFIG(ADDR_SURF_P2) |
   1479			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   1480			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1481		tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
   1482			    PIPE_CONFIG(ADDR_SURF_P2) |
   1483			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   1484			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1485		tile[23] = (TILE_SPLIT(split_equal_to_row_size));
   1486		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
   1487			    PIPE_CONFIG(ADDR_SURF_P2) |
   1488			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   1489			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1490		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
   1491			    PIPE_CONFIG(ADDR_SURF_P2) |
   1492			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   1493			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1494		tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
   1495			    PIPE_CONFIG(ADDR_SURF_P2) |
   1496			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
   1497			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
   1498		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   1499			    PIPE_CONFIG(ADDR_SURF_P2) |
   1500			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
   1501		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   1502			    PIPE_CONFIG(ADDR_SURF_P2) |
   1503			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   1504			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   1505		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   1506			    PIPE_CONFIG(ADDR_SURF_P2) |
   1507			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   1508			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
   1509		tile[30] = (TILE_SPLIT(split_equal_to_row_size));
   1510
   1511		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1512				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   1513				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1514				NUM_BANKS(ADDR_SURF_8_BANK));
   1515		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1516				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   1517				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1518				NUM_BANKS(ADDR_SURF_8_BANK));
   1519		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1520				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   1521				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1522				NUM_BANKS(ADDR_SURF_8_BANK));
   1523		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1524				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1525				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1526				NUM_BANKS(ADDR_SURF_8_BANK));
   1527		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1528				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1529				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1530				NUM_BANKS(ADDR_SURF_8_BANK));
   1531		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1532				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1533				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1534				NUM_BANKS(ADDR_SURF_8_BANK));
   1535		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1536				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1537				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1538				NUM_BANKS(ADDR_SURF_8_BANK));
   1539		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   1540				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   1541				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1542				NUM_BANKS(ADDR_SURF_16_BANK));
   1543		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   1544				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   1545				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1546				NUM_BANKS(ADDR_SURF_16_BANK));
   1547		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   1548				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   1549				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1550				NUM_BANKS(ADDR_SURF_16_BANK));
   1551		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   1552				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   1553				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1554				NUM_BANKS(ADDR_SURF_16_BANK));
   1555		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1556				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   1557				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1558				NUM_BANKS(ADDR_SURF_16_BANK));
   1559		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1560				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1561				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   1562				NUM_BANKS(ADDR_SURF_16_BANK));
   1563		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   1564				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   1565				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   1566				NUM_BANKS(ADDR_SURF_8_BANK));
   1567
   1568		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   1569			WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
   1570		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   1571			if (reg_offset != 7)
   1572				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
   1573		break;
   1574	}
   1575}
   1576
   1577/**
   1578 * gfx_v7_0_select_se_sh - select which SE, SH to address
   1579 *
   1580 * @adev: amdgpu_device pointer
   1581 * @se_num: shader engine to address
   1582 * @sh_num: sh block to address
   1583 * @instance: Certain registers are instanced per SE or SH.
   1584 *            0xffffffff means broadcast to all SEs or SHs (CIK).
   1585 *
   1586 * Select which SE, SH combinations to address.
   1587 */
   1588static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
   1589				  u32 se_num, u32 sh_num, u32 instance)
   1590{
   1591	u32 data;
   1592
   1593	if (instance == 0xffffffff)
   1594		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
   1595	else
   1596		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
   1597
   1598	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
   1599		data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
   1600			GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
   1601	else if (se_num == 0xffffffff)
   1602		data |= GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK |
   1603			(sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT);
   1604	else if (sh_num == 0xffffffff)
   1605		data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
   1606			(se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
   1607	else
   1608		data |= (sh_num << GRBM_GFX_INDEX__SH_INDEX__SHIFT) |
   1609			(se_num << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
   1610	WREG32(mmGRBM_GFX_INDEX, data);
   1611}
   1612
   1613/**
   1614 * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs
   1615 *
   1616 * @adev: amdgpu_device pointer
   1617 *
   1618 * Calculates the bitmask of enabled RBs (CIK).
   1619 * Returns the enabled RB bitmask.
   1620 */
   1621static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
   1622{
   1623	u32 data, mask;
   1624
   1625	data = RREG32(mmCC_RB_BACKEND_DISABLE);
   1626	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
   1627
   1628	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
   1629	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
   1630
   1631	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
   1632					 adev->gfx.config.max_sh_per_se);
   1633
   1634	return (~data) & mask;
   1635}
   1636
   1637static void
   1638gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
   1639{
   1640	switch (adev->asic_type) {
   1641	case CHIP_BONAIRE:
   1642		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
   1643			  SE_XSEL(1) | SE_YSEL(1);
   1644		*rconf1 |= 0x0;
   1645		break;
   1646	case CHIP_HAWAII:
   1647		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
   1648			  RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) |
   1649			  PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) |
   1650			  SE_YSEL(3);
   1651		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
   1652			   SE_PAIR_YSEL(2);
   1653		break;
   1654	case CHIP_KAVERI:
   1655		*rconf |= RB_MAP_PKR0(2);
   1656		*rconf1 |= 0x0;
   1657		break;
   1658	case CHIP_KABINI:
   1659	case CHIP_MULLINS:
   1660		*rconf |= 0x0;
   1661		*rconf1 |= 0x0;
   1662		break;
   1663	default:
   1664		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
   1665		break;
   1666	}
   1667}
   1668
   1669static void
   1670gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
   1671					u32 raster_config, u32 raster_config_1,
   1672					unsigned rb_mask, unsigned num_rb)
   1673{
   1674	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
   1675	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
   1676	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
   1677	unsigned rb_per_se = num_rb / num_se;
   1678	unsigned se_mask[4];
   1679	unsigned se;
   1680
   1681	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
   1682	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
   1683	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
   1684	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
   1685
   1686	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
   1687	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
   1688	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
   1689
   1690	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
   1691			     (!se_mask[2] && !se_mask[3]))) {
   1692		raster_config_1 &= ~SE_PAIR_MAP_MASK;
   1693
   1694		if (!se_mask[0] && !se_mask[1]) {
   1695			raster_config_1 |=
   1696				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
   1697		} else {
   1698			raster_config_1 |=
   1699				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
   1700		}
   1701	}
   1702
   1703	for (se = 0; se < num_se; se++) {
   1704		unsigned raster_config_se = raster_config;
   1705		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
   1706		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
   1707		int idx = (se / 2) * 2;
   1708
   1709		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
   1710			raster_config_se &= ~SE_MAP_MASK;
   1711
   1712			if (!se_mask[idx]) {
   1713				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
   1714			} else {
   1715				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
   1716			}
   1717		}
   1718
   1719		pkr0_mask &= rb_mask;
   1720		pkr1_mask &= rb_mask;
   1721		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
   1722			raster_config_se &= ~PKR_MAP_MASK;
   1723
   1724			if (!pkr0_mask) {
   1725				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
   1726			} else {
   1727				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
   1728			}
   1729		}
   1730
   1731		if (rb_per_se >= 2) {
   1732			unsigned rb0_mask = 1 << (se * rb_per_se);
   1733			unsigned rb1_mask = rb0_mask << 1;
   1734
   1735			rb0_mask &= rb_mask;
   1736			rb1_mask &= rb_mask;
   1737			if (!rb0_mask || !rb1_mask) {
   1738				raster_config_se &= ~RB_MAP_PKR0_MASK;
   1739
   1740				if (!rb0_mask) {
   1741					raster_config_se |=
   1742						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
   1743				} else {
   1744					raster_config_se |=
   1745						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
   1746				}
   1747			}
   1748
   1749			if (rb_per_se > 2) {
   1750				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
   1751				rb1_mask = rb0_mask << 1;
   1752				rb0_mask &= rb_mask;
   1753				rb1_mask &= rb_mask;
   1754				if (!rb0_mask || !rb1_mask) {
   1755					raster_config_se &= ~RB_MAP_PKR1_MASK;
   1756
   1757					if (!rb0_mask) {
   1758						raster_config_se |=
   1759							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
   1760					} else {
   1761						raster_config_se |=
   1762							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
   1763					}
   1764				}
   1765			}
   1766		}
   1767
   1768		/* GRBM_GFX_INDEX has a different offset on CI+ */
   1769		gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
   1770		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
   1771		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
   1772	}
   1773
   1774	/* GRBM_GFX_INDEX has a different offset on CI+ */
   1775	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   1776}
   1777
   1778/**
   1779 * gfx_v7_0_setup_rb - setup the RBs on the asic
   1780 *
   1781 * @adev: amdgpu_device pointer
   1782 *
   1783 * Configures per-SE/SH RB registers (CIK).
   1784 */
   1785static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
   1786{
   1787	int i, j;
   1788	u32 data;
   1789	u32 raster_config = 0, raster_config_1 = 0;
   1790	u32 active_rbs = 0;
   1791	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
   1792					adev->gfx.config.max_sh_per_se;
   1793	unsigned num_rb_pipes;
   1794
   1795	mutex_lock(&adev->grbm_idx_mutex);
   1796	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   1797		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   1798			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
   1799			data = gfx_v7_0_get_rb_active_bitmap(adev);
   1800			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
   1801					       rb_bitmap_width_per_sh);
   1802		}
   1803	}
   1804	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   1805
   1806	adev->gfx.config.backend_enable_mask = active_rbs;
   1807	adev->gfx.config.num_rbs = hweight32(active_rbs);
   1808
   1809	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
   1810			     adev->gfx.config.max_shader_engines, 16);
   1811
   1812	gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
   1813
   1814	if (!adev->gfx.config.backend_enable_mask ||
   1815			adev->gfx.config.num_rbs >= num_rb_pipes) {
   1816		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
   1817		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
   1818	} else {
   1819		gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
   1820							adev->gfx.config.backend_enable_mask,
   1821							num_rb_pipes);
   1822	}
   1823
   1824	/* cache the values for userspace */
   1825	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   1826		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   1827			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
   1828			adev->gfx.config.rb_config[i][j].rb_backend_disable =
   1829				RREG32(mmCC_RB_BACKEND_DISABLE);
   1830			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
   1831				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
   1832			adev->gfx.config.rb_config[i][j].raster_config =
   1833				RREG32(mmPA_SC_RASTER_CONFIG);
   1834			adev->gfx.config.rb_config[i][j].raster_config_1 =
   1835				RREG32(mmPA_SC_RASTER_CONFIG_1);
   1836		}
   1837	}
   1838	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   1839	mutex_unlock(&adev->grbm_idx_mutex);
   1840}
   1841
   1842#define DEFAULT_SH_MEM_BASES	(0x6000)
   1843/**
   1844 * gfx_v7_0_init_compute_vmid - gart enable
   1845 *
   1846 * @adev: amdgpu_device pointer
   1847 *
   1848 * Initialize compute vmid sh_mem registers
   1849 *
   1850 */
   1851static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
   1852{
   1853	int i;
   1854	uint32_t sh_mem_config;
   1855	uint32_t sh_mem_bases;
   1856
   1857	/*
   1858	 * Configure apertures:
   1859	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
   1860	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
   1861	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
   1862	*/
   1863	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
   1864	sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
   1865			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
   1866	sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
   1867	mutex_lock(&adev->srbm_mutex);
   1868	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
   1869		cik_srbm_select(adev, 0, 0, 0, i);
   1870		/* CP and shaders */
   1871		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
   1872		WREG32(mmSH_MEM_APE1_BASE, 1);
   1873		WREG32(mmSH_MEM_APE1_LIMIT, 0);
   1874		WREG32(mmSH_MEM_BASES, sh_mem_bases);
   1875	}
   1876	cik_srbm_select(adev, 0, 0, 0, 0);
   1877	mutex_unlock(&adev->srbm_mutex);
   1878
   1879	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
   1880	   access. These should be enabled by FW for target VMIDs. */
   1881	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
   1882		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
   1883		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
   1884		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
   1885		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
   1886	}
   1887}
   1888
   1889static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev)
   1890{
   1891	int vmid;
   1892
   1893	/*
   1894	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
   1895	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
   1896	 * the driver can enable them for graphics. VMID0 should maintain
   1897	 * access so that HWS firmware can save/restore entries.
   1898	 */
   1899	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
   1900		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
   1901		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
   1902		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
   1903		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
   1904	}
   1905}
   1906
   1907static void gfx_v7_0_config_init(struct amdgpu_device *adev)
   1908{
   1909	adev->gfx.config.double_offchip_lds_buf = 1;
   1910}
   1911
   1912/**
   1913 * gfx_v7_0_constants_init - setup the 3D engine
   1914 *
   1915 * @adev: amdgpu_device pointer
   1916 *
   1917 * init the gfx constants such as the 3D engine, tiling configuration
   1918 * registers, maximum number of quad pipes, render backends...
   1919 */
   1920static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
   1921{
   1922	u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
   1923	u32 tmp;
   1924	int i;
   1925
   1926	WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
   1927
   1928	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
   1929	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
   1930	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
   1931
   1932	gfx_v7_0_tiling_mode_table_init(adev);
   1933
   1934	gfx_v7_0_setup_rb(adev);
   1935	gfx_v7_0_get_cu_info(adev);
   1936	gfx_v7_0_config_init(adev);
   1937
   1938	/* set HW defaults for 3D engine */
   1939	WREG32(mmCP_MEQ_THRESHOLDS,
   1940	       (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
   1941	       (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
   1942
   1943	mutex_lock(&adev->grbm_idx_mutex);
   1944	/*
   1945	 * making sure that the following register writes will be broadcasted
   1946	 * to all the shaders
   1947	 */
   1948	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   1949
   1950	/* XXX SH_MEM regs */
   1951	/* where to put LDS, scratch, GPUVM in FSA64 space */
   1952	sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
   1953				   SH_MEM_ALIGNMENT_MODE_UNALIGNED);
   1954	sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE,
   1955				   MTYPE_NC);
   1956	sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE,
   1957				   MTYPE_UC);
   1958	sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0);
   1959
   1960	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
   1961				   SWIZZLE_ENABLE, 1);
   1962	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
   1963				   ELEMENT_SIZE, 1);
   1964	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
   1965				   INDEX_STRIDE, 3);
   1966	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
   1967
   1968	mutex_lock(&adev->srbm_mutex);
   1969	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
   1970		if (i == 0)
   1971			sh_mem_base = 0;
   1972		else
   1973			sh_mem_base = adev->gmc.shared_aperture_start >> 48;
   1974		cik_srbm_select(adev, 0, 0, 0, i);
   1975		/* CP and shaders */
   1976		WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
   1977		WREG32(mmSH_MEM_APE1_BASE, 1);
   1978		WREG32(mmSH_MEM_APE1_LIMIT, 0);
   1979		WREG32(mmSH_MEM_BASES, sh_mem_base);
   1980	}
   1981	cik_srbm_select(adev, 0, 0, 0, 0);
   1982	mutex_unlock(&adev->srbm_mutex);
   1983
   1984	gfx_v7_0_init_compute_vmid(adev);
   1985	gfx_v7_0_init_gds_vmid(adev);
   1986
   1987	WREG32(mmSX_DEBUG_1, 0x20);
   1988
   1989	WREG32(mmTA_CNTL_AUX, 0x00010000);
   1990
   1991	tmp = RREG32(mmSPI_CONFIG_CNTL);
   1992	tmp |= 0x03000000;
   1993	WREG32(mmSPI_CONFIG_CNTL, tmp);
   1994
   1995	WREG32(mmSQ_CONFIG, 1);
   1996
   1997	WREG32(mmDB_DEBUG, 0);
   1998
   1999	tmp = RREG32(mmDB_DEBUG2) & ~0xf00fffff;
   2000	tmp |= 0x00000400;
   2001	WREG32(mmDB_DEBUG2, tmp);
   2002
   2003	tmp = RREG32(mmDB_DEBUG3) & ~0x0002021c;
   2004	tmp |= 0x00020200;
   2005	WREG32(mmDB_DEBUG3, tmp);
   2006
   2007	tmp = RREG32(mmCB_HW_CONTROL) & ~0x00010000;
   2008	tmp |= 0x00018208;
   2009	WREG32(mmCB_HW_CONTROL, tmp);
   2010
   2011	WREG32(mmSPI_CONFIG_CNTL_1, (4 << SPI_CONFIG_CNTL_1__VTX_DONE_DELAY__SHIFT));
   2012
   2013	WREG32(mmPA_SC_FIFO_SIZE,
   2014		((adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
   2015		(adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
   2016		(adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
   2017		(adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)));
   2018
   2019	WREG32(mmVGT_NUM_INSTANCES, 1);
   2020
   2021	WREG32(mmCP_PERFMON_CNTL, 0);
   2022
   2023	WREG32(mmSQ_CONFIG, 0);
   2024
   2025	WREG32(mmPA_SC_FORCE_EOV_MAX_CNTS,
   2026		((4095 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT__SHIFT) |
   2027		(255 << PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT__SHIFT)));
   2028
   2029	WREG32(mmVGT_CACHE_INVALIDATION,
   2030		(VC_AND_TC << VGT_CACHE_INVALIDATION__CACHE_INVALIDATION__SHIFT) |
   2031		(ES_AND_GS_AUTO << VGT_CACHE_INVALIDATION__AUTO_INVLD_EN__SHIFT));
   2032
   2033	WREG32(mmVGT_GS_VERTEX_REUSE, 16);
   2034	WREG32(mmPA_SC_LINE_STIPPLE_STATE, 0);
   2035
   2036	WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
   2037			(3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
   2038	WREG32(mmPA_SC_ENHANCE, PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
   2039
   2040	tmp = RREG32(mmSPI_ARB_PRIORITY);
   2041	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
   2042	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
   2043	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
   2044	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
   2045	WREG32(mmSPI_ARB_PRIORITY, tmp);
   2046
   2047	mutex_unlock(&adev->grbm_idx_mutex);
   2048
   2049	udelay(50);
   2050}
   2051
   2052/**
   2053 * gfx_v7_0_ring_test_ring - basic gfx ring test
   2054 *
   2055 * @ring: amdgpu_ring structure holding ring information
   2056 *
   2057 * Allocate a scratch register and write to it using the gfx ring (CIK).
   2058 * Provides a basic gfx ring test to verify that the ring is working.
   2059 * Used by gfx_v7_0_cp_gfx_resume();
   2060 * Returns 0 on success, error on failure.
   2061 */
   2062static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
   2063{
   2064	struct amdgpu_device *adev = ring->adev;
   2065	uint32_t tmp = 0;
   2066	unsigned i;
   2067	int r;
   2068
   2069	WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
   2070	r = amdgpu_ring_alloc(ring, 3);
   2071	if (r)
   2072		return r;
   2073
   2074	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
   2075	amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
   2076	amdgpu_ring_write(ring, 0xDEADBEEF);
   2077	amdgpu_ring_commit(ring);
   2078
   2079	for (i = 0; i < adev->usec_timeout; i++) {
   2080		tmp = RREG32(mmSCRATCH_REG0);
   2081		if (tmp == 0xDEADBEEF)
   2082			break;
   2083		udelay(1);
   2084	}
   2085	if (i >= adev->usec_timeout)
   2086		r = -ETIMEDOUT;
   2087	return r;
   2088}
   2089
   2090/**
   2091 * gfx_v7_0_ring_emit_hdp_flush - emit an hdp flush on the cp
   2092 *
   2093 * @ring: amdgpu_ring structure holding ring information
   2094 *
   2095 * Emits an hdp flush on the cp.
   2096 */
   2097static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
   2098{
   2099	u32 ref_and_mask;
   2100	int usepfp = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
   2101
   2102	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
   2103		switch (ring->me) {
   2104		case 1:
   2105			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
   2106			break;
   2107		case 2:
   2108			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
   2109			break;
   2110		default:
   2111			return;
   2112		}
   2113	} else {
   2114		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
   2115	}
   2116
   2117	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
   2118	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
   2119				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
   2120				 WAIT_REG_MEM_ENGINE(usepfp)));   /* pfp or me */
   2121	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
   2122	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
   2123	amdgpu_ring_write(ring, ref_and_mask);
   2124	amdgpu_ring_write(ring, ref_and_mask);
   2125	amdgpu_ring_write(ring, 0x20); /* poll interval */
   2126}
   2127
   2128static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
   2129{
   2130	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
   2131	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
   2132		EVENT_INDEX(4));
   2133
   2134	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
   2135	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
   2136		EVENT_INDEX(0));
   2137}
   2138
   2139/**
   2140 * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
   2141 *
   2142 * @ring: amdgpu_ring structure holding ring information
   2143 * @addr: address
   2144 * @seq: sequence number
   2145 * @flags: fence related flags
   2146 *
   2147 * Emits a fence sequence number on the gfx ring and flushes
   2148 * GPU caches.
   2149 */
   2150static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
   2151					 u64 seq, unsigned flags)
   2152{
   2153	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
   2154	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
   2155	/* Workaround for cache flush problems. First send a dummy EOP
   2156	 * event down the pipe with seq one below.
   2157	 */
   2158	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
   2159	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
   2160				 EOP_TC_ACTION_EN |
   2161				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   2162				 EVENT_INDEX(5)));
   2163	amdgpu_ring_write(ring, addr & 0xfffffffc);
   2164	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
   2165				DATA_SEL(1) | INT_SEL(0));
   2166	amdgpu_ring_write(ring, lower_32_bits(seq - 1));
   2167	amdgpu_ring_write(ring, upper_32_bits(seq - 1));
   2168
   2169	/* Then send the real EOP event down the pipe. */
   2170	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
   2171	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
   2172				 EOP_TC_ACTION_EN |
   2173				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   2174				 EVENT_INDEX(5)));
   2175	amdgpu_ring_write(ring, addr & 0xfffffffc);
   2176	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
   2177				DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
   2178	amdgpu_ring_write(ring, lower_32_bits(seq));
   2179	amdgpu_ring_write(ring, upper_32_bits(seq));
   2180}
   2181
   2182/**
   2183 * gfx_v7_0_ring_emit_fence_compute - emit a fence on the compute ring
   2184 *
   2185 * @ring: amdgpu_ring structure holding ring information
   2186 * @addr: address
   2187 * @seq: sequence number
   2188 * @flags: fence related flags
   2189 *
   2190 * Emits a fence sequence number on the compute ring and flushes
   2191 * GPU caches.
   2192 */
   2193static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
   2194					     u64 addr, u64 seq,
   2195					     unsigned flags)
   2196{
   2197	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
   2198	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
   2199
   2200	/* RELEASE_MEM - flush caches, send int */
   2201	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
   2202	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
   2203				 EOP_TC_ACTION_EN |
   2204				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   2205				 EVENT_INDEX(5)));
   2206	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
   2207	amdgpu_ring_write(ring, addr & 0xfffffffc);
   2208	amdgpu_ring_write(ring, upper_32_bits(addr));
   2209	amdgpu_ring_write(ring, lower_32_bits(seq));
   2210	amdgpu_ring_write(ring, upper_32_bits(seq));
   2211}
   2212
   2213/*
   2214 * IB stuff
   2215 */
   2216/**
   2217 * gfx_v7_0_ring_emit_ib_gfx - emit an IB (Indirect Buffer) on the ring
   2218 *
   2219 * @ring: amdgpu_ring structure holding ring information
   2220 * @job: job to retrieve vmid from
   2221 * @ib: amdgpu indirect buffer object
   2222 * @flags: options (AMDGPU_HAVE_CTX_SWITCH)
   2223 *
   2224 * Emits an DE (drawing engine) or CE (constant engine) IB
   2225 * on the gfx ring.  IBs are usually generated by userspace
   2226 * acceleration drivers and submitted to the kernel for
   2227 * scheduling on the ring.  This function schedules the IB
   2228 * on the gfx ring for execution by the GPU.
   2229 */
   2230static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
   2231					struct amdgpu_job *job,
   2232					struct amdgpu_ib *ib,
   2233					uint32_t flags)
   2234{
   2235	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
   2236	u32 header, control = 0;
   2237
   2238	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
   2239	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
   2240		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
   2241		amdgpu_ring_write(ring, 0);
   2242	}
   2243
   2244	if (ib->flags & AMDGPU_IB_FLAG_CE)
   2245		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
   2246	else
   2247		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
   2248
   2249	control |= ib->length_dw | (vmid << 24);
   2250
   2251	amdgpu_ring_write(ring, header);
   2252	amdgpu_ring_write(ring,
   2253#ifdef __BIG_ENDIAN
   2254			  (2 << 0) |
   2255#endif
   2256			  (ib->gpu_addr & 0xFFFFFFFC));
   2257	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
   2258	amdgpu_ring_write(ring, control);
   2259}
   2260
   2261static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
   2262					  struct amdgpu_job *job,
   2263					  struct amdgpu_ib *ib,
   2264					  uint32_t flags)
   2265{
   2266	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
   2267	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
   2268
   2269	/* Currently, there is a high possibility to get wave ID mismatch
   2270	 * between ME and GDS, leading to a hw deadlock, because ME generates
   2271	 * different wave IDs than the GDS expects. This situation happens
   2272	 * randomly when at least 5 compute pipes use GDS ordered append.
   2273	 * The wave IDs generated by ME are also wrong after suspend/resume.
   2274	 * Those are probably bugs somewhere else in the kernel driver.
   2275	 *
   2276	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
   2277	 * GDS to 0 for this ring (me/pipe).
   2278	 */
   2279	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
   2280		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
   2281		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
   2282		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
   2283	}
   2284
   2285	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
   2286	amdgpu_ring_write(ring,
   2287#ifdef __BIG_ENDIAN
   2288					  (2 << 0) |
   2289#endif
   2290					  (ib->gpu_addr & 0xFFFFFFFC));
   2291	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
   2292	amdgpu_ring_write(ring, control);
   2293}
   2294
   2295static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
   2296{
   2297	uint32_t dw2 = 0;
   2298
   2299	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
   2300	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
   2301		gfx_v7_0_ring_emit_vgt_flush(ring);
   2302		/* set load_global_config & load_global_uconfig */
   2303		dw2 |= 0x8001;
   2304		/* set load_cs_sh_regs */
   2305		dw2 |= 0x01000000;
   2306		/* set load_per_context_state & load_gfx_sh_regs */
   2307		dw2 |= 0x10002;
   2308	}
   2309
   2310	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   2311	amdgpu_ring_write(ring, dw2);
   2312	amdgpu_ring_write(ring, 0);
   2313}
   2314
   2315/**
   2316 * gfx_v7_0_ring_test_ib - basic ring IB test
   2317 *
   2318 * @ring: amdgpu_ring structure holding ring information
   2319 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
   2320 *
   2321 * Allocate an IB and execute it on the gfx ring (CIK).
   2322 * Provides a basic gfx ring test to verify that IBs are working.
   2323 * Returns 0 on success, error on failure.
   2324 */
   2325static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
   2326{
   2327	struct amdgpu_device *adev = ring->adev;
   2328	struct amdgpu_ib ib;
   2329	struct dma_fence *f = NULL;
   2330	uint32_t tmp = 0;
   2331	long r;
   2332
   2333	WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
   2334	memset(&ib, 0, sizeof(ib));
   2335	r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
   2336	if (r)
   2337		return r;
   2338
   2339	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
   2340	ib.ptr[1] = mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START;
   2341	ib.ptr[2] = 0xDEADBEEF;
   2342	ib.length_dw = 3;
   2343
   2344	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
   2345	if (r)
   2346		goto error;
   2347
   2348	r = dma_fence_wait_timeout(f, false, timeout);
   2349	if (r == 0) {
   2350		r = -ETIMEDOUT;
   2351		goto error;
   2352	} else if (r < 0) {
   2353		goto error;
   2354	}
   2355	tmp = RREG32(mmSCRATCH_REG0);
   2356	if (tmp == 0xDEADBEEF)
   2357		r = 0;
   2358	else
   2359		r = -EINVAL;
   2360
   2361error:
   2362	amdgpu_ib_free(adev, &ib, NULL);
   2363	dma_fence_put(f);
   2364	return r;
   2365}
   2366
   2367/*
   2368 * CP.
   2369 * On CIK, gfx and compute now have independent command processors.
   2370 *
   2371 * GFX
   2372 * Gfx consists of a single ring and can process both gfx jobs and
   2373 * compute jobs.  The gfx CP consists of three microengines (ME):
   2374 * PFP - Pre-Fetch Parser
   2375 * ME - Micro Engine
   2376 * CE - Constant Engine
   2377 * The PFP and ME make up what is considered the Drawing Engine (DE).
   2378 * The CE is an asynchronous engine used for updating buffer desciptors
   2379 * used by the DE so that they can be loaded into cache in parallel
   2380 * while the DE is processing state update packets.
   2381 *
   2382 * Compute
   2383 * The compute CP consists of two microengines (ME):
   2384 * MEC1 - Compute MicroEngine 1
   2385 * MEC2 - Compute MicroEngine 2
   2386 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
   2387 * The queues are exposed to userspace and are programmed directly
   2388 * by the compute runtime.
   2389 */
   2390/**
   2391 * gfx_v7_0_cp_gfx_enable - enable/disable the gfx CP MEs
   2392 *
   2393 * @adev: amdgpu_device pointer
   2394 * @enable: enable or disable the MEs
   2395 *
   2396 * Halts or unhalts the gfx MEs.
   2397 */
   2398static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
   2399{
   2400	if (enable)
   2401		WREG32(mmCP_ME_CNTL, 0);
   2402	else
   2403		WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK |
   2404				      CP_ME_CNTL__PFP_HALT_MASK |
   2405				      CP_ME_CNTL__CE_HALT_MASK));
   2406	udelay(50);
   2407}
   2408
   2409/**
   2410 * gfx_v7_0_cp_gfx_load_microcode - load the gfx CP ME ucode
   2411 *
   2412 * @adev: amdgpu_device pointer
   2413 *
   2414 * Loads the gfx PFP, ME, and CE ucode.
   2415 * Returns 0 for success, -EINVAL if the ucode is not available.
   2416 */
   2417static int gfx_v7_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
   2418{
   2419	const struct gfx_firmware_header_v1_0 *pfp_hdr;
   2420	const struct gfx_firmware_header_v1_0 *ce_hdr;
   2421	const struct gfx_firmware_header_v1_0 *me_hdr;
   2422	const __le32 *fw_data;
   2423	unsigned i, fw_size;
   2424
   2425	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
   2426		return -EINVAL;
   2427
   2428	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
   2429	ce_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
   2430	me_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
   2431
   2432	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
   2433	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
   2434	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
   2435	adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version);
   2436	adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version);
   2437	adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version);
   2438	adev->gfx.me_feature_version = le32_to_cpu(me_hdr->ucode_feature_version);
   2439	adev->gfx.ce_feature_version = le32_to_cpu(ce_hdr->ucode_feature_version);
   2440	adev->gfx.pfp_feature_version = le32_to_cpu(pfp_hdr->ucode_feature_version);
   2441
   2442	gfx_v7_0_cp_gfx_enable(adev, false);
   2443
   2444	/* PFP */
   2445	fw_data = (const __le32 *)
   2446		(adev->gfx.pfp_fw->data +
   2447		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
   2448	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
   2449	WREG32(mmCP_PFP_UCODE_ADDR, 0);
   2450	for (i = 0; i < fw_size; i++)
   2451		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
   2452	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
   2453
   2454	/* CE */
   2455	fw_data = (const __le32 *)
   2456		(adev->gfx.ce_fw->data +
   2457		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
   2458	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
   2459	WREG32(mmCP_CE_UCODE_ADDR, 0);
   2460	for (i = 0; i < fw_size; i++)
   2461		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
   2462	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
   2463
   2464	/* ME */
   2465	fw_data = (const __le32 *)
   2466		(adev->gfx.me_fw->data +
   2467		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
   2468	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
   2469	WREG32(mmCP_ME_RAM_WADDR, 0);
   2470	for (i = 0; i < fw_size; i++)
   2471		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
   2472	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
   2473
   2474	return 0;
   2475}
   2476
   2477/**
   2478 * gfx_v7_0_cp_gfx_start - start the gfx ring
   2479 *
   2480 * @adev: amdgpu_device pointer
   2481 *
   2482 * Enables the ring and loads the clear state context and other
   2483 * packets required to init the ring.
   2484 * Returns 0 for success, error for failure.
   2485 */
   2486static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
   2487{
   2488	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
   2489	const struct cs_section_def *sect = NULL;
   2490	const struct cs_extent_def *ext = NULL;
   2491	int r, i;
   2492
   2493	/* init the CP */
   2494	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
   2495	WREG32(mmCP_ENDIAN_SWAP, 0);
   2496	WREG32(mmCP_DEVICE_ID, 1);
   2497
   2498	gfx_v7_0_cp_gfx_enable(adev, true);
   2499
   2500	r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
   2501	if (r) {
   2502		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
   2503		return r;
   2504	}
   2505
   2506	/* init the CE partitions.  CE only used for gfx on CIK */
   2507	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
   2508	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
   2509	amdgpu_ring_write(ring, 0x8000);
   2510	amdgpu_ring_write(ring, 0x8000);
   2511
   2512	/* clear state buffer */
   2513	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   2514	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
   2515
   2516	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   2517	amdgpu_ring_write(ring, 0x80000000);
   2518	amdgpu_ring_write(ring, 0x80000000);
   2519
   2520	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
   2521		for (ext = sect->section; ext->extent != NULL; ++ext) {
   2522			if (sect->id == SECT_CONTEXT) {
   2523				amdgpu_ring_write(ring,
   2524						  PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
   2525				amdgpu_ring_write(ring, ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
   2526				for (i = 0; i < ext->reg_count; i++)
   2527					amdgpu_ring_write(ring, ext->extent[i]);
   2528			}
   2529		}
   2530	}
   2531
   2532	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
   2533	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
   2534	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
   2535	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
   2536
   2537	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   2538	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
   2539
   2540	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
   2541	amdgpu_ring_write(ring, 0);
   2542
   2543	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
   2544	amdgpu_ring_write(ring, 0x00000316);
   2545	amdgpu_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
   2546	amdgpu_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
   2547
   2548	amdgpu_ring_commit(ring);
   2549
   2550	return 0;
   2551}
   2552
   2553/**
   2554 * gfx_v7_0_cp_gfx_resume - setup the gfx ring buffer registers
   2555 *
   2556 * @adev: amdgpu_device pointer
   2557 *
   2558 * Program the location and size of the gfx ring buffer
   2559 * and test it to make sure it's working.
   2560 * Returns 0 for success, error for failure.
   2561 */
   2562static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
   2563{
   2564	struct amdgpu_ring *ring;
   2565	u32 tmp;
   2566	u32 rb_bufsz;
   2567	u64 rb_addr, rptr_addr;
   2568	int r;
   2569
   2570	WREG32(mmCP_SEM_WAIT_TIMER, 0x0);
   2571	if (adev->asic_type != CHIP_HAWAII)
   2572		WREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
   2573
   2574	/* Set the write pointer delay */
   2575	WREG32(mmCP_RB_WPTR_DELAY, 0);
   2576
   2577	/* set the RB to use vmid 0 */
   2578	WREG32(mmCP_RB_VMID, 0);
   2579
   2580	WREG32(mmSCRATCH_ADDR, 0);
   2581
   2582	/* ring 0 - compute and gfx */
   2583	/* Set ring buffer size */
   2584	ring = &adev->gfx.gfx_ring[0];
   2585	rb_bufsz = order_base_2(ring->ring_size / 8);
   2586	tmp = (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
   2587#ifdef __BIG_ENDIAN
   2588	tmp |= 2 << CP_RB0_CNTL__BUF_SWAP__SHIFT;
   2589#endif
   2590	WREG32(mmCP_RB0_CNTL, tmp);
   2591
   2592	/* Initialize the ring buffer's read and write pointers */
   2593	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
   2594	ring->wptr = 0;
   2595	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
   2596
   2597	/* set the wb address wether it's enabled or not */
   2598	rptr_addr = ring->rptr_gpu_addr;
   2599	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
   2600	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
   2601
   2602	/* scratch register shadowing is no longer supported */
   2603	WREG32(mmSCRATCH_UMSK, 0);
   2604
   2605	mdelay(1);
   2606	WREG32(mmCP_RB0_CNTL, tmp);
   2607
   2608	rb_addr = ring->gpu_addr >> 8;
   2609	WREG32(mmCP_RB0_BASE, rb_addr);
   2610	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
   2611
   2612	/* start the ring */
   2613	gfx_v7_0_cp_gfx_start(adev);
   2614	r = amdgpu_ring_test_helper(ring);
   2615	if (r)
   2616		return r;
   2617
   2618	return 0;
   2619}
   2620
   2621static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
   2622{
   2623	return *ring->rptr_cpu_addr;
   2624}
   2625
   2626static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
   2627{
   2628	struct amdgpu_device *adev = ring->adev;
   2629
   2630	return RREG32(mmCP_RB0_WPTR);
   2631}
   2632
   2633static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
   2634{
   2635	struct amdgpu_device *adev = ring->adev;
   2636
   2637	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
   2638	(void)RREG32(mmCP_RB0_WPTR);
   2639}
   2640
   2641static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
   2642{
   2643	/* XXX check if swapping is necessary on BE */
   2644	return *ring->wptr_cpu_addr;
   2645}
   2646
   2647static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
   2648{
   2649	struct amdgpu_device *adev = ring->adev;
   2650
   2651	/* XXX check if swapping is necessary on BE */
   2652	*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
   2653	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
   2654}
   2655
   2656/**
   2657 * gfx_v7_0_cp_compute_enable - enable/disable the compute CP MEs
   2658 *
   2659 * @adev: amdgpu_device pointer
   2660 * @enable: enable or disable the MEs
   2661 *
   2662 * Halts or unhalts the compute MEs.
   2663 */
   2664static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
   2665{
   2666	if (enable)
   2667		WREG32(mmCP_MEC_CNTL, 0);
   2668	else
   2669		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
   2670				       CP_MEC_CNTL__MEC_ME2_HALT_MASK));
   2671	udelay(50);
   2672}
   2673
   2674/**
   2675 * gfx_v7_0_cp_compute_load_microcode - load the compute CP ME ucode
   2676 *
   2677 * @adev: amdgpu_device pointer
   2678 *
   2679 * Loads the compute MEC1&2 ucode.
   2680 * Returns 0 for success, -EINVAL if the ucode is not available.
   2681 */
   2682static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
   2683{
   2684	const struct gfx_firmware_header_v1_0 *mec_hdr;
   2685	const __le32 *fw_data;
   2686	unsigned i, fw_size;
   2687
   2688	if (!adev->gfx.mec_fw)
   2689		return -EINVAL;
   2690
   2691	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
   2692	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
   2693	adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version);
   2694	adev->gfx.mec_feature_version = le32_to_cpu(
   2695					mec_hdr->ucode_feature_version);
   2696
   2697	gfx_v7_0_cp_compute_enable(adev, false);
   2698
   2699	/* MEC1 */
   2700	fw_data = (const __le32 *)
   2701		(adev->gfx.mec_fw->data +
   2702		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
   2703	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
   2704	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
   2705	for (i = 0; i < fw_size; i++)
   2706		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
   2707	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
   2708
   2709	if (adev->asic_type == CHIP_KAVERI) {
   2710		const struct gfx_firmware_header_v1_0 *mec2_hdr;
   2711
   2712		if (!adev->gfx.mec2_fw)
   2713			return -EINVAL;
   2714
   2715		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
   2716		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
   2717		adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version);
   2718		adev->gfx.mec2_feature_version = le32_to_cpu(
   2719				mec2_hdr->ucode_feature_version);
   2720
   2721		/* MEC2 */
   2722		fw_data = (const __le32 *)
   2723			(adev->gfx.mec2_fw->data +
   2724			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
   2725		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
   2726		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
   2727		for (i = 0; i < fw_size; i++)
   2728			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
   2729		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
   2730	}
   2731
   2732	return 0;
   2733}
   2734
   2735/**
   2736 * gfx_v7_0_cp_compute_fini - stop the compute queues
   2737 *
   2738 * @adev: amdgpu_device pointer
   2739 *
   2740 * Stop the compute queues and tear down the driver queue
   2741 * info.
   2742 */
   2743static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
   2744{
   2745	int i;
   2746
   2747	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   2748		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
   2749
   2750		amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
   2751	}
   2752}
   2753
   2754static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
   2755{
   2756	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
   2757}
   2758
   2759static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
   2760{
   2761	int r;
   2762	u32 *hpd;
   2763	size_t mec_hpd_size;
   2764
   2765	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
   2766
   2767	/* take ownership of the relevant compute queues */
   2768	amdgpu_gfx_compute_queue_acquire(adev);
   2769
   2770	/* allocate space for ALL pipes (even the ones we don't own) */
   2771	mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
   2772		* GFX7_MEC_HPD_SIZE * 2;
   2773
   2774	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
   2775				      AMDGPU_GEM_DOMAIN_VRAM,
   2776				      &adev->gfx.mec.hpd_eop_obj,
   2777				      &adev->gfx.mec.hpd_eop_gpu_addr,
   2778				      (void **)&hpd);
   2779	if (r) {
   2780		dev_warn(adev->dev, "(%d) create, pin or map of HDP EOP bo failed\n", r);
   2781		gfx_v7_0_mec_fini(adev);
   2782		return r;
   2783	}
   2784
   2785	/* clear memory.  Not sure if this is required or not */
   2786	memset(hpd, 0, mec_hpd_size);
   2787
   2788	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
   2789	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
   2790
   2791	return 0;
   2792}
   2793
   2794struct hqd_registers
   2795{
   2796	u32 cp_mqd_base_addr;
   2797	u32 cp_mqd_base_addr_hi;
   2798	u32 cp_hqd_active;
   2799	u32 cp_hqd_vmid;
   2800	u32 cp_hqd_persistent_state;
   2801	u32 cp_hqd_pipe_priority;
   2802	u32 cp_hqd_queue_priority;
   2803	u32 cp_hqd_quantum;
   2804	u32 cp_hqd_pq_base;
   2805	u32 cp_hqd_pq_base_hi;
   2806	u32 cp_hqd_pq_rptr;
   2807	u32 cp_hqd_pq_rptr_report_addr;
   2808	u32 cp_hqd_pq_rptr_report_addr_hi;
   2809	u32 cp_hqd_pq_wptr_poll_addr;
   2810	u32 cp_hqd_pq_wptr_poll_addr_hi;
   2811	u32 cp_hqd_pq_doorbell_control;
   2812	u32 cp_hqd_pq_wptr;
   2813	u32 cp_hqd_pq_control;
   2814	u32 cp_hqd_ib_base_addr;
   2815	u32 cp_hqd_ib_base_addr_hi;
   2816	u32 cp_hqd_ib_rptr;
   2817	u32 cp_hqd_ib_control;
   2818	u32 cp_hqd_iq_timer;
   2819	u32 cp_hqd_iq_rptr;
   2820	u32 cp_hqd_dequeue_request;
   2821	u32 cp_hqd_dma_offload;
   2822	u32 cp_hqd_sema_cmd;
   2823	u32 cp_hqd_msg_type;
   2824	u32 cp_hqd_atomic0_preop_lo;
   2825	u32 cp_hqd_atomic0_preop_hi;
   2826	u32 cp_hqd_atomic1_preop_lo;
   2827	u32 cp_hqd_atomic1_preop_hi;
   2828	u32 cp_hqd_hq_scheduler0;
   2829	u32 cp_hqd_hq_scheduler1;
   2830	u32 cp_mqd_control;
   2831};
   2832
   2833static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
   2834				       int mec, int pipe)
   2835{
   2836	u64 eop_gpu_addr;
   2837	u32 tmp;
   2838	size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
   2839			    * GFX7_MEC_HPD_SIZE * 2;
   2840
   2841	mutex_lock(&adev->srbm_mutex);
   2842	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
   2843
   2844	cik_srbm_select(adev, mec + 1, pipe, 0, 0);
   2845
   2846	/* write the EOP addr */
   2847	WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
   2848	WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
   2849
   2850	/* set the VMID assigned */
   2851	WREG32(mmCP_HPD_EOP_VMID, 0);
   2852
   2853	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
   2854	tmp = RREG32(mmCP_HPD_EOP_CONTROL);
   2855	tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
   2856	tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
   2857	WREG32(mmCP_HPD_EOP_CONTROL, tmp);
   2858
   2859	cik_srbm_select(adev, 0, 0, 0, 0);
   2860	mutex_unlock(&adev->srbm_mutex);
   2861}
   2862
   2863static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
   2864{
   2865	int i;
   2866
   2867	/* disable the queue if it's active */
   2868	if (RREG32(mmCP_HQD_ACTIVE) & 1) {
   2869		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
   2870		for (i = 0; i < adev->usec_timeout; i++) {
   2871			if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
   2872				break;
   2873			udelay(1);
   2874		}
   2875
   2876		if (i == adev->usec_timeout)
   2877			return -ETIMEDOUT;
   2878
   2879		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
   2880		WREG32(mmCP_HQD_PQ_RPTR, 0);
   2881		WREG32(mmCP_HQD_PQ_WPTR, 0);
   2882	}
   2883
   2884	return 0;
   2885}
   2886
   2887static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
   2888			     struct cik_mqd *mqd,
   2889			     uint64_t mqd_gpu_addr,
   2890			     struct amdgpu_ring *ring)
   2891{
   2892	u64 hqd_gpu_addr;
   2893	u64 wb_gpu_addr;
   2894
   2895	/* init the mqd struct */
   2896	memset(mqd, 0, sizeof(struct cik_mqd));
   2897
   2898	mqd->header = 0xC0310800;
   2899	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
   2900	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
   2901	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
   2902	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
   2903
   2904	/* enable doorbell? */
   2905	mqd->cp_hqd_pq_doorbell_control =
   2906		RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
   2907	if (ring->use_doorbell)
   2908		mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
   2909	else
   2910		mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
   2911
   2912	/* set the pointer to the MQD */
   2913	mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
   2914	mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
   2915
   2916	/* set MQD vmid to 0 */
   2917	mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
   2918	mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
   2919
   2920	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
   2921	hqd_gpu_addr = ring->gpu_addr >> 8;
   2922	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
   2923	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
   2924
   2925	/* set up the HQD, this is similar to CP_RB0_CNTL */
   2926	mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
   2927	mqd->cp_hqd_pq_control &=
   2928		~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
   2929				CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
   2930
   2931	mqd->cp_hqd_pq_control |=
   2932		order_base_2(ring->ring_size / 8);
   2933	mqd->cp_hqd_pq_control |=
   2934		(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
   2935#ifdef __BIG_ENDIAN
   2936	mqd->cp_hqd_pq_control |=
   2937		2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
   2938#endif
   2939	mqd->cp_hqd_pq_control &=
   2940		~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
   2941				CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
   2942				CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
   2943	mqd->cp_hqd_pq_control |=
   2944		CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
   2945		CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
   2946
   2947	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
   2948	wb_gpu_addr = ring->wptr_gpu_addr;
   2949	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
   2950	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
   2951
   2952	/* set the wb address wether it's enabled or not */
   2953	wb_gpu_addr = ring->rptr_gpu_addr;
   2954	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
   2955	mqd->cp_hqd_pq_rptr_report_addr_hi =
   2956		upper_32_bits(wb_gpu_addr) & 0xffff;
   2957
   2958	/* enable the doorbell if requested */
   2959	if (ring->use_doorbell) {
   2960		mqd->cp_hqd_pq_doorbell_control =
   2961			RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
   2962		mqd->cp_hqd_pq_doorbell_control &=
   2963			~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
   2964		mqd->cp_hqd_pq_doorbell_control |=
   2965			(ring->doorbell_index <<
   2966			 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
   2967		mqd->cp_hqd_pq_doorbell_control |=
   2968			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
   2969		mqd->cp_hqd_pq_doorbell_control &=
   2970			~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
   2971					CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
   2972
   2973	} else {
   2974		mqd->cp_hqd_pq_doorbell_control = 0;
   2975	}
   2976
   2977	/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
   2978	ring->wptr = 0;
   2979	mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
   2980	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
   2981
   2982	/* set the vmid for the queue */
   2983	mqd->cp_hqd_vmid = 0;
   2984
   2985	/* defaults */
   2986	mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
   2987	mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
   2988	mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
   2989	mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
   2990	mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
   2991	mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
   2992	mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
   2993	mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
   2994	mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
   2995	mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
   2996	mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
   2997	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
   2998	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
   2999	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
   3000	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
   3001	mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
   3002
   3003	/* activate the queue */
   3004	mqd->cp_hqd_active = 1;
   3005}
   3006
   3007static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
   3008{
   3009	uint32_t tmp;
   3010	uint32_t mqd_reg;
   3011	uint32_t *mqd_data;
   3012
   3013	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */
   3014	mqd_data = &mqd->cp_mqd_base_addr_lo;
   3015
   3016	/* disable wptr polling */
   3017	tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
   3018	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
   3019	WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
   3020
   3021	/* program all HQD registers */
   3022	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
   3023		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
   3024
   3025	/* activate the HQD */
   3026	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
   3027		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
   3028
   3029	return 0;
   3030}
   3031
   3032static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
   3033{
   3034	int r;
   3035	u64 mqd_gpu_addr;
   3036	struct cik_mqd *mqd;
   3037	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
   3038
   3039	r = amdgpu_bo_create_reserved(adev, sizeof(struct cik_mqd), PAGE_SIZE,
   3040				      AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
   3041				      &mqd_gpu_addr, (void **)&mqd);
   3042	if (r) {
   3043		dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
   3044		return r;
   3045	}
   3046
   3047	mutex_lock(&adev->srbm_mutex);
   3048	cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
   3049
   3050	gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
   3051	gfx_v7_0_mqd_deactivate(adev);
   3052	gfx_v7_0_mqd_commit(adev, mqd);
   3053
   3054	cik_srbm_select(adev, 0, 0, 0, 0);
   3055	mutex_unlock(&adev->srbm_mutex);
   3056
   3057	amdgpu_bo_kunmap(ring->mqd_obj);
   3058	amdgpu_bo_unreserve(ring->mqd_obj);
   3059	return 0;
   3060}
   3061
   3062/**
   3063 * gfx_v7_0_cp_compute_resume - setup the compute queue registers
   3064 *
   3065 * @adev: amdgpu_device pointer
   3066 *
   3067 * Program the compute queues and test them to make sure they
   3068 * are working.
   3069 * Returns 0 for success, error for failure.
   3070 */
   3071static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
   3072{
   3073	int r, i, j;
   3074	u32 tmp;
   3075	struct amdgpu_ring *ring;
   3076
   3077	/* fix up chicken bits */
   3078	tmp = RREG32(mmCP_CPF_DEBUG);
   3079	tmp |= (1 << 23);
   3080	WREG32(mmCP_CPF_DEBUG, tmp);
   3081
   3082	/* init all pipes (even the ones we don't own) */
   3083	for (i = 0; i < adev->gfx.mec.num_mec; i++)
   3084		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
   3085			gfx_v7_0_compute_pipe_init(adev, i, j);
   3086
   3087	/* init the queues */
   3088	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   3089		r = gfx_v7_0_compute_queue_init(adev, i);
   3090		if (r) {
   3091			gfx_v7_0_cp_compute_fini(adev);
   3092			return r;
   3093		}
   3094	}
   3095
   3096	gfx_v7_0_cp_compute_enable(adev, true);
   3097
   3098	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   3099		ring = &adev->gfx.compute_ring[i];
   3100		amdgpu_ring_test_helper(ring);
   3101	}
   3102
   3103	return 0;
   3104}
   3105
   3106static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
   3107{
   3108	gfx_v7_0_cp_gfx_enable(adev, enable);
   3109	gfx_v7_0_cp_compute_enable(adev, enable);
   3110}
   3111
   3112static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
   3113{
   3114	int r;
   3115
   3116	r = gfx_v7_0_cp_gfx_load_microcode(adev);
   3117	if (r)
   3118		return r;
   3119	r = gfx_v7_0_cp_compute_load_microcode(adev);
   3120	if (r)
   3121		return r;
   3122
   3123	return 0;
   3124}
   3125
   3126static void gfx_v7_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
   3127					       bool enable)
   3128{
   3129	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
   3130
   3131	if (enable)
   3132		tmp |= (CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
   3133				CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
   3134	else
   3135		tmp &= ~(CP_INT_CNTL_RING0__CNTX_BUSY_INT_ENABLE_MASK |
   3136				CP_INT_CNTL_RING0__CNTX_EMPTY_INT_ENABLE_MASK);
   3137	WREG32(mmCP_INT_CNTL_RING0, tmp);
   3138}
   3139
   3140static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
   3141{
   3142	int r;
   3143
   3144	gfx_v7_0_enable_gui_idle_interrupt(adev, false);
   3145
   3146	r = gfx_v7_0_cp_load_microcode(adev);
   3147	if (r)
   3148		return r;
   3149
   3150	r = gfx_v7_0_cp_gfx_resume(adev);
   3151	if (r)
   3152		return r;
   3153	r = gfx_v7_0_cp_compute_resume(adev);
   3154	if (r)
   3155		return r;
   3156
   3157	gfx_v7_0_enable_gui_idle_interrupt(adev, true);
   3158
   3159	return 0;
   3160}
   3161
   3162/**
   3163 * gfx_v7_0_ring_emit_pipeline_sync - cik vm flush using the CP
   3164 *
   3165 * @ring: the ring to emit the commands to
   3166 *
   3167 * Sync the command pipeline with the PFP. E.g. wait for everything
   3168 * to be completed.
   3169 */
   3170static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
   3171{
   3172	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
   3173	uint32_t seq = ring->fence_drv.sync_seq;
   3174	uint64_t addr = ring->fence_drv.gpu_addr;
   3175
   3176	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
   3177	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
   3178				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
   3179				 WAIT_REG_MEM_ENGINE(usepfp)));   /* pfp or me */
   3180	amdgpu_ring_write(ring, addr & 0xfffffffc);
   3181	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
   3182	amdgpu_ring_write(ring, seq);
   3183	amdgpu_ring_write(ring, 0xffffffff);
   3184	amdgpu_ring_write(ring, 4); /* poll interval */
   3185
   3186	if (usepfp) {
   3187		/* sync CE with ME to prevent CE fetch CEIB before context switch done */
   3188		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
   3189		amdgpu_ring_write(ring, 0);
   3190		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
   3191		amdgpu_ring_write(ring, 0);
   3192	}
   3193}
   3194
   3195/*
   3196 * vm
   3197 * VMID 0 is the physical GPU addresses as used by the kernel.
   3198 * VMIDs 1-15 are used for userspace clients and are handled
   3199 * by the amdgpu vm/hsa code.
   3200 */
   3201/**
   3202 * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP
   3203 *
   3204 * @ring: amdgpu_ring pointer
   3205 * @vmid: vmid number to use
   3206 * @pd_addr: address
   3207 *
   3208 * Update the page table base and flush the VM TLB
   3209 * using the CP (CIK).
   3210 */
   3211static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
   3212					unsigned vmid, uint64_t pd_addr)
   3213{
   3214	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
   3215
   3216	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
   3217
   3218	/* wait for the invalidate to complete */
   3219	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
   3220	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
   3221				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
   3222				 WAIT_REG_MEM_ENGINE(0))); /* me */
   3223	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
   3224	amdgpu_ring_write(ring, 0);
   3225	amdgpu_ring_write(ring, 0); /* ref */
   3226	amdgpu_ring_write(ring, 0); /* mask */
   3227	amdgpu_ring_write(ring, 0x20); /* poll interval */
   3228
   3229	/* compute doesn't have PFP */
   3230	if (usepfp) {
   3231		/* sync PFP to ME, otherwise we might get invalid PFP reads */
   3232		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
   3233		amdgpu_ring_write(ring, 0x0);
   3234
   3235		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
   3236		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
   3237		amdgpu_ring_write(ring, 0);
   3238		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
   3239		amdgpu_ring_write(ring, 0);
   3240	}
   3241}
   3242
   3243static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
   3244				    uint32_t reg, uint32_t val)
   3245{
   3246	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
   3247
   3248	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   3249	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
   3250				 WRITE_DATA_DST_SEL(0)));
   3251	amdgpu_ring_write(ring, reg);
   3252	amdgpu_ring_write(ring, 0);
   3253	amdgpu_ring_write(ring, val);
   3254}
   3255
   3256/*
   3257 * RLC
   3258 * The RLC is a multi-purpose microengine that handles a
   3259 * variety of functions.
   3260 */
   3261static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
   3262{
   3263	const u32 *src_ptr;
   3264	u32 dws;
   3265	const struct cs_section_def *cs_data;
   3266	int r;
   3267
   3268	/* allocate rlc buffers */
   3269	if (adev->flags & AMD_IS_APU) {
   3270		if (adev->asic_type == CHIP_KAVERI) {
   3271			adev->gfx.rlc.reg_list = spectre_rlc_save_restore_register_list;
   3272			adev->gfx.rlc.reg_list_size =
   3273				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
   3274		} else {
   3275			adev->gfx.rlc.reg_list = kalindi_rlc_save_restore_register_list;
   3276			adev->gfx.rlc.reg_list_size =
   3277				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
   3278		}
   3279	}
   3280	adev->gfx.rlc.cs_data = ci_cs_data;
   3281	adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
   3282	adev->gfx.rlc.cp_table_size += 64 * 1024; /* GDS */
   3283
   3284	src_ptr = adev->gfx.rlc.reg_list;
   3285	dws = adev->gfx.rlc.reg_list_size;
   3286	dws += (5 * 16) + 48 + 48 + 64;
   3287
   3288	cs_data = adev->gfx.rlc.cs_data;
   3289
   3290	if (src_ptr) {
   3291		/* init save restore block */
   3292		r = amdgpu_gfx_rlc_init_sr(adev, dws);
   3293		if (r)
   3294			return r;
   3295	}
   3296
   3297	if (cs_data) {
   3298		/* init clear state block */
   3299		r = amdgpu_gfx_rlc_init_csb(adev);
   3300		if (r)
   3301			return r;
   3302	}
   3303
   3304	if (adev->gfx.rlc.cp_table_size) {
   3305		r = amdgpu_gfx_rlc_init_cpt(adev);
   3306		if (r)
   3307			return r;
   3308	}
   3309
   3310	/* init spm vmid with 0xf */
   3311	if (adev->gfx.rlc.funcs->update_spm_vmid)
   3312		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
   3313
   3314	return 0;
   3315}
   3316
   3317static void gfx_v7_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
   3318{
   3319	u32 tmp;
   3320
   3321	tmp = RREG32(mmRLC_LB_CNTL);
   3322	if (enable)
   3323		tmp |= RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
   3324	else
   3325		tmp &= ~RLC_LB_CNTL__LOAD_BALANCE_ENABLE_MASK;
   3326	WREG32(mmRLC_LB_CNTL, tmp);
   3327}
   3328
   3329static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
   3330{
   3331	u32 i, j, k;
   3332	u32 mask;
   3333
   3334	mutex_lock(&adev->grbm_idx_mutex);
   3335	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   3336		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   3337			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
   3338			for (k = 0; k < adev->usec_timeout; k++) {
   3339				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
   3340					break;
   3341				udelay(1);
   3342			}
   3343		}
   3344	}
   3345	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   3346	mutex_unlock(&adev->grbm_idx_mutex);
   3347
   3348	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
   3349		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
   3350		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
   3351		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
   3352	for (k = 0; k < adev->usec_timeout; k++) {
   3353		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
   3354			break;
   3355		udelay(1);
   3356	}
   3357}
   3358
   3359static void gfx_v7_0_update_rlc(struct amdgpu_device *adev, u32 rlc)
   3360{
   3361	u32 tmp;
   3362
   3363	tmp = RREG32(mmRLC_CNTL);
   3364	if (tmp != rlc)
   3365		WREG32(mmRLC_CNTL, rlc);
   3366}
   3367
   3368static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
   3369{
   3370	u32 data, orig;
   3371
   3372	orig = data = RREG32(mmRLC_CNTL);
   3373
   3374	if (data & RLC_CNTL__RLC_ENABLE_F32_MASK) {
   3375		u32 i;
   3376
   3377		data &= ~RLC_CNTL__RLC_ENABLE_F32_MASK;
   3378		WREG32(mmRLC_CNTL, data);
   3379
   3380		for (i = 0; i < adev->usec_timeout; i++) {
   3381			if ((RREG32(mmRLC_GPM_STAT) & RLC_GPM_STAT__RLC_BUSY_MASK) == 0)
   3382				break;
   3383			udelay(1);
   3384		}
   3385
   3386		gfx_v7_0_wait_for_rlc_serdes(adev);
   3387	}
   3388
   3389	return orig;
   3390}
   3391
   3392static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
   3393{
   3394	return true;
   3395}
   3396
   3397static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
   3398{
   3399	u32 tmp, i, mask;
   3400
   3401	tmp = 0x1 | (1 << 1);
   3402	WREG32(mmRLC_GPR_REG2, tmp);
   3403
   3404	mask = RLC_GPM_STAT__GFX_POWER_STATUS_MASK |
   3405		RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK;
   3406	for (i = 0; i < adev->usec_timeout; i++) {
   3407		if ((RREG32(mmRLC_GPM_STAT) & mask) == mask)
   3408			break;
   3409		udelay(1);
   3410	}
   3411
   3412	for (i = 0; i < adev->usec_timeout; i++) {
   3413		if ((RREG32(mmRLC_GPR_REG2) & 0x1) == 0)
   3414			break;
   3415		udelay(1);
   3416	}
   3417}
   3418
   3419static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
   3420{
   3421	u32 tmp;
   3422
   3423	tmp = 0x1 | (0 << 1);
   3424	WREG32(mmRLC_GPR_REG2, tmp);
   3425}
   3426
   3427/**
   3428 * gfx_v7_0_rlc_stop - stop the RLC ME
   3429 *
   3430 * @adev: amdgpu_device pointer
   3431 *
   3432 * Halt the RLC ME (MicroEngine) (CIK).
   3433 */
   3434static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
   3435{
   3436	WREG32(mmRLC_CNTL, 0);
   3437
   3438	gfx_v7_0_enable_gui_idle_interrupt(adev, false);
   3439
   3440	gfx_v7_0_wait_for_rlc_serdes(adev);
   3441}
   3442
   3443/**
   3444 * gfx_v7_0_rlc_start - start the RLC ME
   3445 *
   3446 * @adev: amdgpu_device pointer
   3447 *
   3448 * Unhalt the RLC ME (MicroEngine) (CIK).
   3449 */
   3450static void gfx_v7_0_rlc_start(struct amdgpu_device *adev)
   3451{
   3452	WREG32(mmRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
   3453
   3454	gfx_v7_0_enable_gui_idle_interrupt(adev, true);
   3455
   3456	udelay(50);
   3457}
   3458
   3459static void gfx_v7_0_rlc_reset(struct amdgpu_device *adev)
   3460{
   3461	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
   3462
   3463	tmp |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
   3464	WREG32(mmGRBM_SOFT_RESET, tmp);
   3465	udelay(50);
   3466	tmp &= ~GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
   3467	WREG32(mmGRBM_SOFT_RESET, tmp);
   3468	udelay(50);
   3469}
   3470
   3471/**
   3472 * gfx_v7_0_rlc_resume - setup the RLC hw
   3473 *
   3474 * @adev: amdgpu_device pointer
   3475 *
   3476 * Initialize the RLC registers, load the ucode,
   3477 * and start the RLC (CIK).
   3478 * Returns 0 for success, -EINVAL if the ucode is not available.
   3479 */
   3480static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
   3481{
   3482	const struct rlc_firmware_header_v1_0 *hdr;
   3483	const __le32 *fw_data;
   3484	unsigned i, fw_size;
   3485	u32 tmp;
   3486
   3487	if (!adev->gfx.rlc_fw)
   3488		return -EINVAL;
   3489
   3490	hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
   3491	amdgpu_ucode_print_rlc_hdr(&hdr->header);
   3492	adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version);
   3493	adev->gfx.rlc_feature_version = le32_to_cpu(
   3494					hdr->ucode_feature_version);
   3495
   3496	adev->gfx.rlc.funcs->stop(adev);
   3497
   3498	/* disable CG */
   3499	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
   3500	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
   3501
   3502	adev->gfx.rlc.funcs->reset(adev);
   3503
   3504	gfx_v7_0_init_pg(adev);
   3505
   3506	WREG32(mmRLC_LB_CNTR_INIT, 0);
   3507	WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
   3508
   3509	mutex_lock(&adev->grbm_idx_mutex);
   3510	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   3511	WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
   3512	WREG32(mmRLC_LB_PARAMS, 0x00600408);
   3513	WREG32(mmRLC_LB_CNTL, 0x80000004);
   3514	mutex_unlock(&adev->grbm_idx_mutex);
   3515
   3516	WREG32(mmRLC_MC_CNTL, 0);
   3517	WREG32(mmRLC_UCODE_CNTL, 0);
   3518
   3519	fw_data = (const __le32 *)
   3520		(adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   3521	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
   3522	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
   3523	for (i = 0; i < fw_size; i++)
   3524		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
   3525	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
   3526
   3527	/* XXX - find out what chips support lbpw */
   3528	gfx_v7_0_enable_lbpw(adev, false);
   3529
   3530	if (adev->asic_type == CHIP_BONAIRE)
   3531		WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
   3532
   3533	adev->gfx.rlc.funcs->start(adev);
   3534
   3535	return 0;
   3536}
   3537
   3538static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
   3539{
   3540	u32 data;
   3541
   3542	amdgpu_gfx_off_ctrl(adev, false);
   3543
   3544	data = RREG32(mmRLC_SPM_VMID);
   3545
   3546	data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
   3547	data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
   3548
   3549	WREG32(mmRLC_SPM_VMID, data);
   3550
   3551	amdgpu_gfx_off_ctrl(adev, true);
   3552}
   3553
   3554static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
   3555{
   3556	u32 data, orig, tmp, tmp2;
   3557
   3558	orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
   3559
   3560	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
   3561		gfx_v7_0_enable_gui_idle_interrupt(adev, true);
   3562
   3563		tmp = gfx_v7_0_halt_rlc(adev);
   3564
   3565		mutex_lock(&adev->grbm_idx_mutex);
   3566		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   3567		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
   3568		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
   3569		tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
   3570			RLC_SERDES_WR_CTRL__CGCG_OVERRIDE_0_MASK |
   3571			RLC_SERDES_WR_CTRL__CGLS_ENABLE_MASK;
   3572		WREG32(mmRLC_SERDES_WR_CTRL, tmp2);
   3573		mutex_unlock(&adev->grbm_idx_mutex);
   3574
   3575		gfx_v7_0_update_rlc(adev, tmp);
   3576
   3577		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
   3578		if (orig != data)
   3579			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
   3580
   3581	} else {
   3582		gfx_v7_0_enable_gui_idle_interrupt(adev, false);
   3583
   3584		RREG32(mmCB_CGTT_SCLK_CTRL);
   3585		RREG32(mmCB_CGTT_SCLK_CTRL);
   3586		RREG32(mmCB_CGTT_SCLK_CTRL);
   3587		RREG32(mmCB_CGTT_SCLK_CTRL);
   3588
   3589		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
   3590		if (orig != data)
   3591			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
   3592
   3593		gfx_v7_0_enable_gui_idle_interrupt(adev, true);
   3594	}
   3595}
   3596
   3597static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
   3598{
   3599	u32 data, orig, tmp = 0;
   3600
   3601	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
   3602		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
   3603			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
   3604				orig = data = RREG32(mmCP_MEM_SLP_CNTL);
   3605				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
   3606				if (orig != data)
   3607					WREG32(mmCP_MEM_SLP_CNTL, data);
   3608			}
   3609		}
   3610
   3611		orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
   3612		data |= 0x00000001;
   3613		data &= 0xfffffffd;
   3614		if (orig != data)
   3615			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
   3616
   3617		tmp = gfx_v7_0_halt_rlc(adev);
   3618
   3619		mutex_lock(&adev->grbm_idx_mutex);
   3620		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   3621		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
   3622		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
   3623		data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
   3624			RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_0_MASK;
   3625		WREG32(mmRLC_SERDES_WR_CTRL, data);
   3626		mutex_unlock(&adev->grbm_idx_mutex);
   3627
   3628		gfx_v7_0_update_rlc(adev, tmp);
   3629
   3630		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
   3631			orig = data = RREG32(mmCGTS_SM_CTRL_REG);
   3632			data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
   3633			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
   3634			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
   3635			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
   3636			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
   3637			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
   3638				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
   3639			data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
   3640			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
   3641			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
   3642			if (orig != data)
   3643				WREG32(mmCGTS_SM_CTRL_REG, data);
   3644		}
   3645	} else {
   3646		orig = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
   3647		data |= 0x00000003;
   3648		if (orig != data)
   3649			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
   3650
   3651		data = RREG32(mmRLC_MEM_SLP_CNTL);
   3652		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
   3653			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
   3654			WREG32(mmRLC_MEM_SLP_CNTL, data);
   3655		}
   3656
   3657		data = RREG32(mmCP_MEM_SLP_CNTL);
   3658		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
   3659			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
   3660			WREG32(mmCP_MEM_SLP_CNTL, data);
   3661		}
   3662
   3663		orig = data = RREG32(mmCGTS_SM_CTRL_REG);
   3664		data |= CGTS_SM_CTRL_REG__OVERRIDE_MASK | CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
   3665		if (orig != data)
   3666			WREG32(mmCGTS_SM_CTRL_REG, data);
   3667
   3668		tmp = gfx_v7_0_halt_rlc(adev);
   3669
   3670		mutex_lock(&adev->grbm_idx_mutex);
   3671		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   3672		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
   3673		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
   3674		data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
   3675		WREG32(mmRLC_SERDES_WR_CTRL, data);
   3676		mutex_unlock(&adev->grbm_idx_mutex);
   3677
   3678		gfx_v7_0_update_rlc(adev, tmp);
   3679	}
   3680}
   3681
   3682static void gfx_v7_0_update_cg(struct amdgpu_device *adev,
   3683			       bool enable)
   3684{
   3685	gfx_v7_0_enable_gui_idle_interrupt(adev, false);
   3686	/* order matters! */
   3687	if (enable) {
   3688		gfx_v7_0_enable_mgcg(adev, true);
   3689		gfx_v7_0_enable_cgcg(adev, true);
   3690	} else {
   3691		gfx_v7_0_enable_cgcg(adev, false);
   3692		gfx_v7_0_enable_mgcg(adev, false);
   3693	}
   3694	gfx_v7_0_enable_gui_idle_interrupt(adev, true);
   3695}
   3696
   3697static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
   3698						bool enable)
   3699{
   3700	u32 data, orig;
   3701
   3702	orig = data = RREG32(mmRLC_PG_CNTL);
   3703	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
   3704		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
   3705	else
   3706		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
   3707	if (orig != data)
   3708		WREG32(mmRLC_PG_CNTL, data);
   3709}
   3710
   3711static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
   3712						bool enable)
   3713{
   3714	u32 data, orig;
   3715
   3716	orig = data = RREG32(mmRLC_PG_CNTL);
   3717	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
   3718		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
   3719	else
   3720		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
   3721	if (orig != data)
   3722		WREG32(mmRLC_PG_CNTL, data);
   3723}
   3724
   3725static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
   3726{
   3727	u32 data, orig;
   3728
   3729	orig = data = RREG32(mmRLC_PG_CNTL);
   3730	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
   3731		data &= ~0x8000;
   3732	else
   3733		data |= 0x8000;
   3734	if (orig != data)
   3735		WREG32(mmRLC_PG_CNTL, data);
   3736}
   3737
   3738static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
   3739{
   3740	u32 data, orig;
   3741
   3742	orig = data = RREG32(mmRLC_PG_CNTL);
   3743	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GDS))
   3744		data &= ~0x2000;
   3745	else
   3746		data |= 0x2000;
   3747	if (orig != data)
   3748		WREG32(mmRLC_PG_CNTL, data);
   3749}
   3750
   3751static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev)
   3752{
   3753	if (adev->asic_type == CHIP_KAVERI)
   3754		return 5;
   3755	else
   3756		return 4;
   3757}
   3758
   3759static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
   3760				     bool enable)
   3761{
   3762	u32 data, orig;
   3763
   3764	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
   3765		orig = data = RREG32(mmRLC_PG_CNTL);
   3766		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
   3767		if (orig != data)
   3768			WREG32(mmRLC_PG_CNTL, data);
   3769
   3770		orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
   3771		data |= RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
   3772		if (orig != data)
   3773			WREG32(mmRLC_AUTO_PG_CTRL, data);
   3774	} else {
   3775		orig = data = RREG32(mmRLC_PG_CNTL);
   3776		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
   3777		if (orig != data)
   3778			WREG32(mmRLC_PG_CNTL, data);
   3779
   3780		orig = data = RREG32(mmRLC_AUTO_PG_CTRL);
   3781		data &= ~RLC_AUTO_PG_CTRL__AUTO_PG_EN_MASK;
   3782		if (orig != data)
   3783			WREG32(mmRLC_AUTO_PG_CTRL, data);
   3784
   3785		data = RREG32(mmDB_RENDER_CONTROL);
   3786	}
   3787}
   3788
   3789static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
   3790						 u32 bitmap)
   3791{
   3792	u32 data;
   3793
   3794	if (!bitmap)
   3795		return;
   3796
   3797	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
   3798	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
   3799
   3800	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
   3801}
   3802
   3803static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
   3804{
   3805	u32 data, mask;
   3806
   3807	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
   3808	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
   3809
   3810	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
   3811	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
   3812
   3813	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
   3814
   3815	return (~data) & mask;
   3816}
   3817
   3818static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
   3819{
   3820	u32 tmp;
   3821
   3822	WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
   3823
   3824	tmp = RREG32(mmRLC_MAX_PG_CU);
   3825	tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
   3826	tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
   3827	WREG32(mmRLC_MAX_PG_CU, tmp);
   3828}
   3829
   3830static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
   3831					    bool enable)
   3832{
   3833	u32 data, orig;
   3834
   3835	orig = data = RREG32(mmRLC_PG_CNTL);
   3836	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
   3837		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
   3838	else
   3839		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
   3840	if (orig != data)
   3841		WREG32(mmRLC_PG_CNTL, data);
   3842}
   3843
   3844static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
   3845					     bool enable)
   3846{
   3847	u32 data, orig;
   3848
   3849	orig = data = RREG32(mmRLC_PG_CNTL);
   3850	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
   3851		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
   3852	else
   3853		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
   3854	if (orig != data)
   3855		WREG32(mmRLC_PG_CNTL, data);
   3856}
   3857
   3858#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
   3859#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
   3860
   3861static void gfx_v7_0_init_gfx_cgpg(struct amdgpu_device *adev)
   3862{
   3863	u32 data, orig;
   3864	u32 i;
   3865
   3866	if (adev->gfx.rlc.cs_data) {
   3867		WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
   3868		WREG32(mmRLC_GPM_SCRATCH_DATA, upper_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
   3869		WREG32(mmRLC_GPM_SCRATCH_DATA, lower_32_bits(adev->gfx.rlc.clear_state_gpu_addr));
   3870		WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.clear_state_size);
   3871	} else {
   3872		WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
   3873		for (i = 0; i < 3; i++)
   3874			WREG32(mmRLC_GPM_SCRATCH_DATA, 0);
   3875	}
   3876	if (adev->gfx.rlc.reg_list) {
   3877		WREG32(mmRLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
   3878		for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
   3879			WREG32(mmRLC_GPM_SCRATCH_DATA, adev->gfx.rlc.reg_list[i]);
   3880	}
   3881
   3882	orig = data = RREG32(mmRLC_PG_CNTL);
   3883	data |= RLC_PG_CNTL__GFX_POWER_GATING_SRC_MASK;
   3884	if (orig != data)
   3885		WREG32(mmRLC_PG_CNTL, data);
   3886
   3887	WREG32(mmRLC_SAVE_AND_RESTORE_BASE, adev->gfx.rlc.save_restore_gpu_addr >> 8);
   3888	WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
   3889
   3890	data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
   3891	data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
   3892	data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
   3893	WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
   3894
   3895	data = 0x10101010;
   3896	WREG32(mmRLC_PG_DELAY, data);
   3897
   3898	data = RREG32(mmRLC_PG_DELAY_2);
   3899	data &= ~0xff;
   3900	data |= 0x3;
   3901	WREG32(mmRLC_PG_DELAY_2, data);
   3902
   3903	data = RREG32(mmRLC_AUTO_PG_CTRL);
   3904	data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
   3905	data |= (0x700 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
   3906	WREG32(mmRLC_AUTO_PG_CTRL, data);
   3907
   3908}
   3909
   3910static void gfx_v7_0_update_gfx_pg(struct amdgpu_device *adev, bool enable)
   3911{
   3912	gfx_v7_0_enable_gfx_cgpg(adev, enable);
   3913	gfx_v7_0_enable_gfx_static_mgpg(adev, enable);
   3914	gfx_v7_0_enable_gfx_dynamic_mgpg(adev, enable);
   3915}
   3916
   3917static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
   3918{
   3919	u32 count = 0;
   3920	const struct cs_section_def *sect = NULL;
   3921	const struct cs_extent_def *ext = NULL;
   3922
   3923	if (adev->gfx.rlc.cs_data == NULL)
   3924		return 0;
   3925
   3926	/* begin clear state */
   3927	count += 2;
   3928	/* context control state */
   3929	count += 3;
   3930
   3931	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
   3932		for (ext = sect->section; ext->extent != NULL; ++ext) {
   3933			if (sect->id == SECT_CONTEXT)
   3934				count += 2 + ext->reg_count;
   3935			else
   3936				return 0;
   3937		}
   3938	}
   3939	/* pa_sc_raster_config/pa_sc_raster_config1 */
   3940	count += 4;
   3941	/* end clear state */
   3942	count += 2;
   3943	/* clear state */
   3944	count += 2;
   3945
   3946	return count;
   3947}
   3948
   3949static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
   3950				    volatile u32 *buffer)
   3951{
   3952	u32 count = 0, i;
   3953	const struct cs_section_def *sect = NULL;
   3954	const struct cs_extent_def *ext = NULL;
   3955
   3956	if (adev->gfx.rlc.cs_data == NULL)
   3957		return;
   3958	if (buffer == NULL)
   3959		return;
   3960
   3961	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   3962	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
   3963
   3964	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   3965	buffer[count++] = cpu_to_le32(0x80000000);
   3966	buffer[count++] = cpu_to_le32(0x80000000);
   3967
   3968	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
   3969		for (ext = sect->section; ext->extent != NULL; ++ext) {
   3970			if (sect->id == SECT_CONTEXT) {
   3971				buffer[count++] =
   3972					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
   3973				buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
   3974				for (i = 0; i < ext->reg_count; i++)
   3975					buffer[count++] = cpu_to_le32(ext->extent[i]);
   3976			} else {
   3977				return;
   3978			}
   3979		}
   3980	}
   3981
   3982	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
   3983	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
   3984	switch (adev->asic_type) {
   3985	case CHIP_BONAIRE:
   3986		buffer[count++] = cpu_to_le32(0x16000012);
   3987		buffer[count++] = cpu_to_le32(0x00000000);
   3988		break;
   3989	case CHIP_KAVERI:
   3990		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
   3991		buffer[count++] = cpu_to_le32(0x00000000);
   3992		break;
   3993	case CHIP_KABINI:
   3994	case CHIP_MULLINS:
   3995		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
   3996		buffer[count++] = cpu_to_le32(0x00000000);
   3997		break;
   3998	case CHIP_HAWAII:
   3999		buffer[count++] = cpu_to_le32(0x3a00161a);
   4000		buffer[count++] = cpu_to_le32(0x0000002e);
   4001		break;
   4002	default:
   4003		buffer[count++] = cpu_to_le32(0x00000000);
   4004		buffer[count++] = cpu_to_le32(0x00000000);
   4005		break;
   4006	}
   4007
   4008	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   4009	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
   4010
   4011	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
   4012	buffer[count++] = cpu_to_le32(0);
   4013}
   4014
   4015static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
   4016{
   4017	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
   4018			      AMD_PG_SUPPORT_GFX_SMG |
   4019			      AMD_PG_SUPPORT_GFX_DMG |
   4020			      AMD_PG_SUPPORT_CP |
   4021			      AMD_PG_SUPPORT_GDS |
   4022			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
   4023		gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
   4024		gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
   4025		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
   4026			gfx_v7_0_init_gfx_cgpg(adev);
   4027			gfx_v7_0_enable_cp_pg(adev, true);
   4028			gfx_v7_0_enable_gds_pg(adev, true);
   4029		}
   4030		gfx_v7_0_init_ao_cu_mask(adev);
   4031		gfx_v7_0_update_gfx_pg(adev, true);
   4032	}
   4033}
   4034
   4035static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
   4036{
   4037	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
   4038			      AMD_PG_SUPPORT_GFX_SMG |
   4039			      AMD_PG_SUPPORT_GFX_DMG |
   4040			      AMD_PG_SUPPORT_CP |
   4041			      AMD_PG_SUPPORT_GDS |
   4042			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
   4043		gfx_v7_0_update_gfx_pg(adev, false);
   4044		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
   4045			gfx_v7_0_enable_cp_pg(adev, false);
   4046			gfx_v7_0_enable_gds_pg(adev, false);
   4047		}
   4048	}
   4049}
   4050
   4051/**
   4052 * gfx_v7_0_get_gpu_clock_counter - return GPU clock counter snapshot
   4053 *
   4054 * @adev: amdgpu_device pointer
   4055 *
   4056 * Fetches a GPU clock counter snapshot (SI).
   4057 * Returns the 64 bit clock counter snapshot.
   4058 */
   4059static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
   4060{
   4061	uint64_t clock;
   4062
   4063	mutex_lock(&adev->gfx.gpu_clock_mutex);
   4064	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
   4065	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
   4066		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
   4067	mutex_unlock(&adev->gfx.gpu_clock_mutex);
   4068	return clock;
   4069}
   4070
   4071static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
   4072					  uint32_t vmid,
   4073					  uint32_t gds_base, uint32_t gds_size,
   4074					  uint32_t gws_base, uint32_t gws_size,
   4075					  uint32_t oa_base, uint32_t oa_size)
   4076{
   4077	/* GDS Base */
   4078	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   4079	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   4080				WRITE_DATA_DST_SEL(0)));
   4081	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
   4082	amdgpu_ring_write(ring, 0);
   4083	amdgpu_ring_write(ring, gds_base);
   4084
   4085	/* GDS Size */
   4086	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   4087	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   4088				WRITE_DATA_DST_SEL(0)));
   4089	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
   4090	amdgpu_ring_write(ring, 0);
   4091	amdgpu_ring_write(ring, gds_size);
   4092
   4093	/* GWS */
   4094	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   4095	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   4096				WRITE_DATA_DST_SEL(0)));
   4097	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
   4098	amdgpu_ring_write(ring, 0);
   4099	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
   4100
   4101	/* OA */
   4102	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   4103	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
   4104				WRITE_DATA_DST_SEL(0)));
   4105	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
   4106	amdgpu_ring_write(ring, 0);
   4107	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
   4108}
   4109
   4110static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
   4111{
   4112	struct amdgpu_device *adev = ring->adev;
   4113	uint32_t value = 0;
   4114
   4115	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
   4116	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
   4117	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
   4118	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
   4119	WREG32(mmSQ_CMD, value);
   4120}
   4121
   4122static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
   4123{
   4124	WREG32(mmSQ_IND_INDEX,
   4125		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
   4126		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
   4127		(address << SQ_IND_INDEX__INDEX__SHIFT) |
   4128		(SQ_IND_INDEX__FORCE_READ_MASK));
   4129	return RREG32(mmSQ_IND_DATA);
   4130}
   4131
   4132static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
   4133			   uint32_t wave, uint32_t thread,
   4134			   uint32_t regno, uint32_t num, uint32_t *out)
   4135{
   4136	WREG32(mmSQ_IND_INDEX,
   4137		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
   4138		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
   4139		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
   4140		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
   4141		(SQ_IND_INDEX__FORCE_READ_MASK) |
   4142		(SQ_IND_INDEX__AUTO_INCR_MASK));
   4143	while (num--)
   4144		*(out++) = RREG32(mmSQ_IND_DATA);
   4145}
   4146
   4147static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
   4148{
   4149	/* type 0 wave data */
   4150	dst[(*no_fields)++] = 0;
   4151	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
   4152	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
   4153	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
   4154	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
   4155	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
   4156	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
   4157	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
   4158	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
   4159	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
   4160	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
   4161	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
   4162	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
   4163	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
   4164	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
   4165	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
   4166	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
   4167	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
   4168	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
   4169	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
   4170}
   4171
   4172static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
   4173				     uint32_t wave, uint32_t start,
   4174				     uint32_t size, uint32_t *dst)
   4175{
   4176	wave_read_regs(
   4177		adev, simd, wave, 0,
   4178		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
   4179}
   4180
   4181static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
   4182				  u32 me, u32 pipe, u32 q, u32 vm)
   4183{
   4184	cik_srbm_select(adev, me, pipe, q, vm);
   4185}
   4186
   4187static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
   4188	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
   4189	.select_se_sh = &gfx_v7_0_select_se_sh,
   4190	.read_wave_data = &gfx_v7_0_read_wave_data,
   4191	.read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
   4192	.select_me_pipe_q = &gfx_v7_0_select_me_pipe_q
   4193};
   4194
   4195static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
   4196	.is_rlc_enabled = gfx_v7_0_is_rlc_enabled,
   4197	.set_safe_mode = gfx_v7_0_set_safe_mode,
   4198	.unset_safe_mode = gfx_v7_0_unset_safe_mode,
   4199	.init = gfx_v7_0_rlc_init,
   4200	.get_csb_size = gfx_v7_0_get_csb_size,
   4201	.get_csb_buffer = gfx_v7_0_get_csb_buffer,
   4202	.get_cp_table_num = gfx_v7_0_cp_pg_table_num,
   4203	.resume = gfx_v7_0_rlc_resume,
   4204	.stop = gfx_v7_0_rlc_stop,
   4205	.reset = gfx_v7_0_rlc_reset,
   4206	.start = gfx_v7_0_rlc_start,
   4207	.update_spm_vmid = gfx_v7_0_update_spm_vmid
   4208};
   4209
   4210static int gfx_v7_0_early_init(void *handle)
   4211{
   4212	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4213
   4214	adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
   4215	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
   4216					  AMDGPU_MAX_COMPUTE_RINGS);
   4217	adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
   4218	adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
   4219	gfx_v7_0_set_ring_funcs(adev);
   4220	gfx_v7_0_set_irq_funcs(adev);
   4221	gfx_v7_0_set_gds_init(adev);
   4222
   4223	return 0;
   4224}
   4225
   4226static int gfx_v7_0_late_init(void *handle)
   4227{
   4228	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4229	int r;
   4230
   4231	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
   4232	if (r)
   4233		return r;
   4234
   4235	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
   4236	if (r)
   4237		return r;
   4238
   4239	return 0;
   4240}
   4241
   4242static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
   4243{
   4244	u32 gb_addr_config;
   4245	u32 mc_arb_ramcfg;
   4246	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
   4247	u32 tmp;
   4248
   4249	switch (adev->asic_type) {
   4250	case CHIP_BONAIRE:
   4251		adev->gfx.config.max_shader_engines = 2;
   4252		adev->gfx.config.max_tile_pipes = 4;
   4253		adev->gfx.config.max_cu_per_sh = 7;
   4254		adev->gfx.config.max_sh_per_se = 1;
   4255		adev->gfx.config.max_backends_per_se = 2;
   4256		adev->gfx.config.max_texture_channel_caches = 4;
   4257		adev->gfx.config.max_gprs = 256;
   4258		adev->gfx.config.max_gs_threads = 32;
   4259		adev->gfx.config.max_hw_contexts = 8;
   4260
   4261		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   4262		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   4263		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   4264		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
   4265		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
   4266		break;
   4267	case CHIP_HAWAII:
   4268		adev->gfx.config.max_shader_engines = 4;
   4269		adev->gfx.config.max_tile_pipes = 16;
   4270		adev->gfx.config.max_cu_per_sh = 11;
   4271		adev->gfx.config.max_sh_per_se = 1;
   4272		adev->gfx.config.max_backends_per_se = 4;
   4273		adev->gfx.config.max_texture_channel_caches = 16;
   4274		adev->gfx.config.max_gprs = 256;
   4275		adev->gfx.config.max_gs_threads = 32;
   4276		adev->gfx.config.max_hw_contexts = 8;
   4277
   4278		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   4279		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   4280		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   4281		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
   4282		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
   4283		break;
   4284	case CHIP_KAVERI:
   4285		adev->gfx.config.max_shader_engines = 1;
   4286		adev->gfx.config.max_tile_pipes = 4;
   4287		adev->gfx.config.max_cu_per_sh = 8;
   4288		adev->gfx.config.max_backends_per_se = 2;
   4289		adev->gfx.config.max_sh_per_se = 1;
   4290		adev->gfx.config.max_texture_channel_caches = 4;
   4291		adev->gfx.config.max_gprs = 256;
   4292		adev->gfx.config.max_gs_threads = 16;
   4293		adev->gfx.config.max_hw_contexts = 8;
   4294
   4295		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   4296		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   4297		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   4298		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
   4299		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
   4300		break;
   4301	case CHIP_KABINI:
   4302	case CHIP_MULLINS:
   4303	default:
   4304		adev->gfx.config.max_shader_engines = 1;
   4305		adev->gfx.config.max_tile_pipes = 2;
   4306		adev->gfx.config.max_cu_per_sh = 2;
   4307		adev->gfx.config.max_sh_per_se = 1;
   4308		adev->gfx.config.max_backends_per_se = 1;
   4309		adev->gfx.config.max_texture_channel_caches = 2;
   4310		adev->gfx.config.max_gprs = 256;
   4311		adev->gfx.config.max_gs_threads = 16;
   4312		adev->gfx.config.max_hw_contexts = 8;
   4313
   4314		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
   4315		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
   4316		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
   4317		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
   4318		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
   4319		break;
   4320	}
   4321
   4322	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
   4323	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
   4324
   4325	adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
   4326				MC_ARB_RAMCFG, NOOFBANK);
   4327	adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
   4328				MC_ARB_RAMCFG, NOOFRANKS);
   4329
   4330	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
   4331	adev->gfx.config.mem_max_burst_length_bytes = 256;
   4332	if (adev->flags & AMD_IS_APU) {
   4333		/* Get memory bank mapping mode. */
   4334		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
   4335		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
   4336		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
   4337
   4338		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
   4339		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
   4340		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
   4341
   4342		/* Validate settings in case only one DIMM installed. */
   4343		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
   4344			dimm00_addr_map = 0;
   4345		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
   4346			dimm01_addr_map = 0;
   4347		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
   4348			dimm10_addr_map = 0;
   4349		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
   4350			dimm11_addr_map = 0;
   4351
   4352		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
   4353		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
   4354		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
   4355			adev->gfx.config.mem_row_size_in_kb = 2;
   4356		else
   4357			adev->gfx.config.mem_row_size_in_kb = 1;
   4358	} else {
   4359		tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
   4360		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
   4361		if (adev->gfx.config.mem_row_size_in_kb > 4)
   4362			adev->gfx.config.mem_row_size_in_kb = 4;
   4363	}
   4364	/* XXX use MC settings? */
   4365	adev->gfx.config.shader_engine_tile_size = 32;
   4366	adev->gfx.config.num_gpus = 1;
   4367	adev->gfx.config.multi_gpu_tile_size = 64;
   4368
   4369	/* fix up row size */
   4370	gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
   4371	switch (adev->gfx.config.mem_row_size_in_kb) {
   4372	case 1:
   4373	default:
   4374		gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
   4375		break;
   4376	case 2:
   4377		gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
   4378		break;
   4379	case 4:
   4380		gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
   4381		break;
   4382	}
   4383	adev->gfx.config.gb_addr_config = gb_addr_config;
   4384}
   4385
   4386static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
   4387					int mec, int pipe, int queue)
   4388{
   4389	int r;
   4390	unsigned irq_type;
   4391	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
   4392
   4393	/* mec0 is me1 */
   4394	ring->me = mec + 1;
   4395	ring->pipe = pipe;
   4396	ring->queue = queue;
   4397
   4398	ring->ring_obj = NULL;
   4399	ring->use_doorbell = true;
   4400	ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
   4401	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
   4402
   4403	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
   4404		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
   4405		+ ring->pipe;
   4406
   4407	/* type-2 packets are deprecated on MEC, use type-3 instead */
   4408	r = amdgpu_ring_init(adev, ring, 1024,
   4409			     &adev->gfx.eop_irq, irq_type,
   4410			     AMDGPU_RING_PRIO_DEFAULT, NULL);
   4411	if (r)
   4412		return r;
   4413
   4414
   4415	return 0;
   4416}
   4417
   4418static int gfx_v7_0_sw_init(void *handle)
   4419{
   4420	struct amdgpu_ring *ring;
   4421	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4422	int i, j, k, r, ring_id;
   4423
   4424	switch (adev->asic_type) {
   4425	case CHIP_KAVERI:
   4426		adev->gfx.mec.num_mec = 2;
   4427		break;
   4428	case CHIP_BONAIRE:
   4429	case CHIP_HAWAII:
   4430	case CHIP_KABINI:
   4431	case CHIP_MULLINS:
   4432	default:
   4433		adev->gfx.mec.num_mec = 1;
   4434		break;
   4435	}
   4436	adev->gfx.mec.num_pipe_per_mec = 4;
   4437	adev->gfx.mec.num_queue_per_pipe = 8;
   4438
   4439	/* EOP Event */
   4440	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
   4441	if (r)
   4442		return r;
   4443
   4444	/* Privileged reg */
   4445	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184,
   4446			      &adev->gfx.priv_reg_irq);
   4447	if (r)
   4448		return r;
   4449
   4450	/* Privileged inst */
   4451	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185,
   4452			      &adev->gfx.priv_inst_irq);
   4453	if (r)
   4454		return r;
   4455
   4456	r = gfx_v7_0_init_microcode(adev);
   4457	if (r) {
   4458		DRM_ERROR("Failed to load gfx firmware!\n");
   4459		return r;
   4460	}
   4461
   4462	r = adev->gfx.rlc.funcs->init(adev);
   4463	if (r) {
   4464		DRM_ERROR("Failed to init rlc BOs!\n");
   4465		return r;
   4466	}
   4467
   4468	/* allocate mec buffers */
   4469	r = gfx_v7_0_mec_init(adev);
   4470	if (r) {
   4471		DRM_ERROR("Failed to init MEC BOs!\n");
   4472		return r;
   4473	}
   4474
   4475	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
   4476		ring = &adev->gfx.gfx_ring[i];
   4477		ring->ring_obj = NULL;
   4478		sprintf(ring->name, "gfx");
   4479		r = amdgpu_ring_init(adev, ring, 1024,
   4480				     &adev->gfx.eop_irq,
   4481				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
   4482				     AMDGPU_RING_PRIO_DEFAULT, NULL);
   4483		if (r)
   4484			return r;
   4485	}
   4486
   4487	/* set up the compute queues - allocate horizontally across pipes */
   4488	ring_id = 0;
   4489	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
   4490		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
   4491			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
   4492				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
   4493					continue;
   4494
   4495				r = gfx_v7_0_compute_ring_init(adev,
   4496								ring_id,
   4497								i, k, j);
   4498				if (r)
   4499					return r;
   4500
   4501				ring_id++;
   4502			}
   4503		}
   4504	}
   4505
   4506	adev->gfx.ce_ram_size = 0x8000;
   4507
   4508	gfx_v7_0_gpu_early_init(adev);
   4509
   4510	return r;
   4511}
   4512
   4513static int gfx_v7_0_sw_fini(void *handle)
   4514{
   4515	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4516	int i;
   4517
   4518	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
   4519		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
   4520	for (i = 0; i < adev->gfx.num_compute_rings; i++)
   4521		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
   4522
   4523	gfx_v7_0_cp_compute_fini(adev);
   4524	amdgpu_gfx_rlc_fini(adev);
   4525	gfx_v7_0_mec_fini(adev);
   4526	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
   4527				&adev->gfx.rlc.clear_state_gpu_addr,
   4528				(void **)&adev->gfx.rlc.cs_ptr);
   4529	if (adev->gfx.rlc.cp_table_size) {
   4530		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
   4531				&adev->gfx.rlc.cp_table_gpu_addr,
   4532				(void **)&adev->gfx.rlc.cp_table_ptr);
   4533	}
   4534	gfx_v7_0_free_microcode(adev);
   4535
   4536	return 0;
   4537}
   4538
   4539static int gfx_v7_0_hw_init(void *handle)
   4540{
   4541	int r;
   4542	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4543
   4544	gfx_v7_0_constants_init(adev);
   4545
   4546	/* init CSB */
   4547	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
   4548	/* init rlc */
   4549	r = adev->gfx.rlc.funcs->resume(adev);
   4550	if (r)
   4551		return r;
   4552
   4553	r = gfx_v7_0_cp_resume(adev);
   4554	if (r)
   4555		return r;
   4556
   4557	return r;
   4558}
   4559
   4560static int gfx_v7_0_hw_fini(void *handle)
   4561{
   4562	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4563
   4564	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
   4565	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
   4566	gfx_v7_0_cp_enable(adev, false);
   4567	adev->gfx.rlc.funcs->stop(adev);
   4568	gfx_v7_0_fini_pg(adev);
   4569
   4570	return 0;
   4571}
   4572
   4573static int gfx_v7_0_suspend(void *handle)
   4574{
   4575	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4576
   4577	return gfx_v7_0_hw_fini(adev);
   4578}
   4579
   4580static int gfx_v7_0_resume(void *handle)
   4581{
   4582	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4583
   4584	return gfx_v7_0_hw_init(adev);
   4585}
   4586
   4587static bool gfx_v7_0_is_idle(void *handle)
   4588{
   4589	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4590
   4591	if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
   4592		return false;
   4593	else
   4594		return true;
   4595}
   4596
   4597static int gfx_v7_0_wait_for_idle(void *handle)
   4598{
   4599	unsigned i;
   4600	u32 tmp;
   4601	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4602
   4603	for (i = 0; i < adev->usec_timeout; i++) {
   4604		/* read MC_STATUS */
   4605		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
   4606
   4607		if (!tmp)
   4608			return 0;
   4609		udelay(1);
   4610	}
   4611	return -ETIMEDOUT;
   4612}
   4613
   4614static int gfx_v7_0_soft_reset(void *handle)
   4615{
   4616	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
   4617	u32 tmp;
   4618	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4619
   4620	/* GRBM_STATUS */
   4621	tmp = RREG32(mmGRBM_STATUS);
   4622	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
   4623		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
   4624		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
   4625		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
   4626		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
   4627		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
   4628		grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
   4629			GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
   4630
   4631	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
   4632		grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
   4633		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
   4634	}
   4635
   4636	/* GRBM_STATUS2 */
   4637	tmp = RREG32(mmGRBM_STATUS2);
   4638	if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
   4639		grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
   4640
   4641	/* SRBM_STATUS */
   4642	tmp = RREG32(mmSRBM_STATUS);
   4643	if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
   4644		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
   4645
   4646	if (grbm_soft_reset || srbm_soft_reset) {
   4647		/* disable CG/PG */
   4648		gfx_v7_0_fini_pg(adev);
   4649		gfx_v7_0_update_cg(adev, false);
   4650
   4651		/* stop the rlc */
   4652		adev->gfx.rlc.funcs->stop(adev);
   4653
   4654		/* Disable GFX parsing/prefetching */
   4655		WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
   4656
   4657		/* Disable MEC parsing/prefetching */
   4658		WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
   4659
   4660		if (grbm_soft_reset) {
   4661			tmp = RREG32(mmGRBM_SOFT_RESET);
   4662			tmp |= grbm_soft_reset;
   4663			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
   4664			WREG32(mmGRBM_SOFT_RESET, tmp);
   4665			tmp = RREG32(mmGRBM_SOFT_RESET);
   4666
   4667			udelay(50);
   4668
   4669			tmp &= ~grbm_soft_reset;
   4670			WREG32(mmGRBM_SOFT_RESET, tmp);
   4671			tmp = RREG32(mmGRBM_SOFT_RESET);
   4672		}
   4673
   4674		if (srbm_soft_reset) {
   4675			tmp = RREG32(mmSRBM_SOFT_RESET);
   4676			tmp |= srbm_soft_reset;
   4677			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
   4678			WREG32(mmSRBM_SOFT_RESET, tmp);
   4679			tmp = RREG32(mmSRBM_SOFT_RESET);
   4680
   4681			udelay(50);
   4682
   4683			tmp &= ~srbm_soft_reset;
   4684			WREG32(mmSRBM_SOFT_RESET, tmp);
   4685			tmp = RREG32(mmSRBM_SOFT_RESET);
   4686		}
   4687		/* Wait a little for things to settle down */
   4688		udelay(50);
   4689	}
   4690	return 0;
   4691}
   4692
   4693static void gfx_v7_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
   4694						 enum amdgpu_interrupt_state state)
   4695{
   4696	u32 cp_int_cntl;
   4697
   4698	switch (state) {
   4699	case AMDGPU_IRQ_STATE_DISABLE:
   4700		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
   4701		cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
   4702		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
   4703		break;
   4704	case AMDGPU_IRQ_STATE_ENABLE:
   4705		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
   4706		cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
   4707		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
   4708		break;
   4709	default:
   4710		break;
   4711	}
   4712}
   4713
   4714static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
   4715						     int me, int pipe,
   4716						     enum amdgpu_interrupt_state state)
   4717{
   4718	u32 mec_int_cntl, mec_int_cntl_reg;
   4719
   4720	/*
   4721	 * amdgpu controls only the first MEC. That's why this function only
   4722	 * handles the setting of interrupts for this specific MEC. All other
   4723	 * pipes' interrupts are set by amdkfd.
   4724	 */
   4725
   4726	if (me == 1) {
   4727		switch (pipe) {
   4728		case 0:
   4729			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
   4730			break;
   4731		case 1:
   4732			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
   4733			break;
   4734		case 2:
   4735			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
   4736			break;
   4737		case 3:
   4738			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
   4739			break;
   4740		default:
   4741			DRM_DEBUG("invalid pipe %d\n", pipe);
   4742			return;
   4743		}
   4744	} else {
   4745		DRM_DEBUG("invalid me %d\n", me);
   4746		return;
   4747	}
   4748
   4749	switch (state) {
   4750	case AMDGPU_IRQ_STATE_DISABLE:
   4751		mec_int_cntl = RREG32(mec_int_cntl_reg);
   4752		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
   4753		WREG32(mec_int_cntl_reg, mec_int_cntl);
   4754		break;
   4755	case AMDGPU_IRQ_STATE_ENABLE:
   4756		mec_int_cntl = RREG32(mec_int_cntl_reg);
   4757		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
   4758		WREG32(mec_int_cntl_reg, mec_int_cntl);
   4759		break;
   4760	default:
   4761		break;
   4762	}
   4763}
   4764
   4765static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
   4766					     struct amdgpu_irq_src *src,
   4767					     unsigned type,
   4768					     enum amdgpu_interrupt_state state)
   4769{
   4770	u32 cp_int_cntl;
   4771
   4772	switch (state) {
   4773	case AMDGPU_IRQ_STATE_DISABLE:
   4774		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
   4775		cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
   4776		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
   4777		break;
   4778	case AMDGPU_IRQ_STATE_ENABLE:
   4779		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
   4780		cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
   4781		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
   4782		break;
   4783	default:
   4784		break;
   4785	}
   4786
   4787	return 0;
   4788}
   4789
   4790static int gfx_v7_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
   4791					      struct amdgpu_irq_src *src,
   4792					      unsigned type,
   4793					      enum amdgpu_interrupt_state state)
   4794{
   4795	u32 cp_int_cntl;
   4796
   4797	switch (state) {
   4798	case AMDGPU_IRQ_STATE_DISABLE:
   4799		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
   4800		cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
   4801		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
   4802		break;
   4803	case AMDGPU_IRQ_STATE_ENABLE:
   4804		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
   4805		cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK;
   4806		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
   4807		break;
   4808	default:
   4809		break;
   4810	}
   4811
   4812	return 0;
   4813}
   4814
   4815static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev,
   4816					    struct amdgpu_irq_src *src,
   4817					    unsigned type,
   4818					    enum amdgpu_interrupt_state state)
   4819{
   4820	switch (type) {
   4821	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
   4822		gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
   4823		break;
   4824	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
   4825		gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
   4826		break;
   4827	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
   4828		gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
   4829		break;
   4830	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
   4831		gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
   4832		break;
   4833	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
   4834		gfx_v7_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
   4835		break;
   4836	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
   4837		gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
   4838		break;
   4839	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
   4840		gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
   4841		break;
   4842	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
   4843		gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
   4844		break;
   4845	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
   4846		gfx_v7_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
   4847		break;
   4848	default:
   4849		break;
   4850	}
   4851	return 0;
   4852}
   4853
   4854static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
   4855			    struct amdgpu_irq_src *source,
   4856			    struct amdgpu_iv_entry *entry)
   4857{
   4858	u8 me_id, pipe_id;
   4859	struct amdgpu_ring *ring;
   4860	int i;
   4861
   4862	DRM_DEBUG("IH: CP EOP\n");
   4863	me_id = (entry->ring_id & 0x0c) >> 2;
   4864	pipe_id = (entry->ring_id & 0x03) >> 0;
   4865	switch (me_id) {
   4866	case 0:
   4867		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
   4868		break;
   4869	case 1:
   4870	case 2:
   4871		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   4872			ring = &adev->gfx.compute_ring[i];
   4873			if ((ring->me == me_id) && (ring->pipe == pipe_id))
   4874				amdgpu_fence_process(ring);
   4875		}
   4876		break;
   4877	}
   4878	return 0;
   4879}
   4880
   4881static void gfx_v7_0_fault(struct amdgpu_device *adev,
   4882			   struct amdgpu_iv_entry *entry)
   4883{
   4884	struct amdgpu_ring *ring;
   4885	u8 me_id, pipe_id;
   4886	int i;
   4887
   4888	me_id = (entry->ring_id & 0x0c) >> 2;
   4889	pipe_id = (entry->ring_id & 0x03) >> 0;
   4890	switch (me_id) {
   4891	case 0:
   4892		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
   4893		break;
   4894	case 1:
   4895	case 2:
   4896		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
   4897			ring = &adev->gfx.compute_ring[i];
   4898			if ((ring->me == me_id) && (ring->pipe == pipe_id))
   4899				drm_sched_fault(&ring->sched);
   4900		}
   4901		break;
   4902	}
   4903}
   4904
   4905static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
   4906				 struct amdgpu_irq_src *source,
   4907				 struct amdgpu_iv_entry *entry)
   4908{
   4909	DRM_ERROR("Illegal register access in command stream\n");
   4910	gfx_v7_0_fault(adev, entry);
   4911	return 0;
   4912}
   4913
   4914static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
   4915				  struct amdgpu_irq_src *source,
   4916				  struct amdgpu_iv_entry *entry)
   4917{
   4918	DRM_ERROR("Illegal instruction in command stream\n");
   4919	// XXX soft reset the gfx block only
   4920	gfx_v7_0_fault(adev, entry);
   4921	return 0;
   4922}
   4923
   4924static int gfx_v7_0_set_clockgating_state(void *handle,
   4925					  enum amd_clockgating_state state)
   4926{
   4927	bool gate = false;
   4928	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4929
   4930	if (state == AMD_CG_STATE_GATE)
   4931		gate = true;
   4932
   4933	gfx_v7_0_enable_gui_idle_interrupt(adev, false);
   4934	/* order matters! */
   4935	if (gate) {
   4936		gfx_v7_0_enable_mgcg(adev, true);
   4937		gfx_v7_0_enable_cgcg(adev, true);
   4938	} else {
   4939		gfx_v7_0_enable_cgcg(adev, false);
   4940		gfx_v7_0_enable_mgcg(adev, false);
   4941	}
   4942	gfx_v7_0_enable_gui_idle_interrupt(adev, true);
   4943
   4944	return 0;
   4945}
   4946
   4947static int gfx_v7_0_set_powergating_state(void *handle,
   4948					  enum amd_powergating_state state)
   4949{
   4950	bool gate = false;
   4951	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
   4952
   4953	if (state == AMD_PG_STATE_GATE)
   4954		gate = true;
   4955
   4956	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
   4957			      AMD_PG_SUPPORT_GFX_SMG |
   4958			      AMD_PG_SUPPORT_GFX_DMG |
   4959			      AMD_PG_SUPPORT_CP |
   4960			      AMD_PG_SUPPORT_GDS |
   4961			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
   4962		gfx_v7_0_update_gfx_pg(adev, gate);
   4963		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
   4964			gfx_v7_0_enable_cp_pg(adev, gate);
   4965			gfx_v7_0_enable_gds_pg(adev, gate);
   4966		}
   4967	}
   4968
   4969	return 0;
   4970}
   4971
   4972static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring)
   4973{
   4974	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
   4975	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
   4976			  PACKET3_TC_ACTION_ENA |
   4977			  PACKET3_SH_KCACHE_ACTION_ENA |
   4978			  PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */
   4979	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
   4980	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
   4981	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
   4982}
   4983
   4984static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
   4985{
   4986	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
   4987	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
   4988			  PACKET3_TC_ACTION_ENA |
   4989			  PACKET3_SH_KCACHE_ACTION_ENA |
   4990			  PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */
   4991	amdgpu_ring_write(ring, 0xffffffff);	/* CP_COHER_SIZE */
   4992	amdgpu_ring_write(ring, 0xff);		/* CP_COHER_SIZE_HI */
   4993	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE */
   4994	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE_HI */
   4995	amdgpu_ring_write(ring, 0x0000000A);	/* poll interval */
   4996}
   4997
   4998static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
   4999	.name = "gfx_v7_0",
   5000	.early_init = gfx_v7_0_early_init,
   5001	.late_init = gfx_v7_0_late_init,
   5002	.sw_init = gfx_v7_0_sw_init,
   5003	.sw_fini = gfx_v7_0_sw_fini,
   5004	.hw_init = gfx_v7_0_hw_init,
   5005	.hw_fini = gfx_v7_0_hw_fini,
   5006	.suspend = gfx_v7_0_suspend,
   5007	.resume = gfx_v7_0_resume,
   5008	.is_idle = gfx_v7_0_is_idle,
   5009	.wait_for_idle = gfx_v7_0_wait_for_idle,
   5010	.soft_reset = gfx_v7_0_soft_reset,
   5011	.set_clockgating_state = gfx_v7_0_set_clockgating_state,
   5012	.set_powergating_state = gfx_v7_0_set_powergating_state,
   5013};
   5014
   5015static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
   5016	.type = AMDGPU_RING_TYPE_GFX,
   5017	.align_mask = 0xff,
   5018	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
   5019	.support_64bit_ptrs = false,
   5020	.get_rptr = gfx_v7_0_ring_get_rptr,
   5021	.get_wptr = gfx_v7_0_ring_get_wptr_gfx,
   5022	.set_wptr = gfx_v7_0_ring_set_wptr_gfx,
   5023	.emit_frame_size =
   5024		20 + /* gfx_v7_0_ring_emit_gds_switch */
   5025		7 + /* gfx_v7_0_ring_emit_hdp_flush */
   5026		5 + /* hdp invalidate */
   5027		12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
   5028		7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
   5029		CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
   5030		3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
   5031		5, /* SURFACE_SYNC */
   5032	.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
   5033	.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
   5034	.emit_fence = gfx_v7_0_ring_emit_fence_gfx,
   5035	.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
   5036	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
   5037	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
   5038	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
   5039	.test_ring = gfx_v7_0_ring_test_ring,
   5040	.test_ib = gfx_v7_0_ring_test_ib,
   5041	.insert_nop = amdgpu_ring_insert_nop,
   5042	.pad_ib = amdgpu_ring_generic_pad_ib,
   5043	.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
   5044	.emit_wreg = gfx_v7_0_ring_emit_wreg,
   5045	.soft_recovery = gfx_v7_0_ring_soft_recovery,
   5046	.emit_mem_sync = gfx_v7_0_emit_mem_sync,
   5047};
   5048
   5049static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
   5050	.type = AMDGPU_RING_TYPE_COMPUTE,
   5051	.align_mask = 0xff,
   5052	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
   5053	.support_64bit_ptrs = false,
   5054	.get_rptr = gfx_v7_0_ring_get_rptr,
   5055	.get_wptr = gfx_v7_0_ring_get_wptr_compute,
   5056	.set_wptr = gfx_v7_0_ring_set_wptr_compute,
   5057	.emit_frame_size =
   5058		20 + /* gfx_v7_0_ring_emit_gds_switch */
   5059		7 + /* gfx_v7_0_ring_emit_hdp_flush */
   5060		5 + /* hdp invalidate */
   5061		7 + /* gfx_v7_0_ring_emit_pipeline_sync */
   5062		CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
   5063		7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
   5064		7, /* gfx_v7_0_emit_mem_sync_compute */
   5065	.emit_ib_size =	7, /* gfx_v7_0_ring_emit_ib_compute */
   5066	.emit_ib = gfx_v7_0_ring_emit_ib_compute,
   5067	.emit_fence = gfx_v7_0_ring_emit_fence_compute,
   5068	.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
   5069	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
   5070	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
   5071	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
   5072	.test_ring = gfx_v7_0_ring_test_ring,
   5073	.test_ib = gfx_v7_0_ring_test_ib,
   5074	.insert_nop = amdgpu_ring_insert_nop,
   5075	.pad_ib = amdgpu_ring_generic_pad_ib,
   5076	.emit_wreg = gfx_v7_0_ring_emit_wreg,
   5077	.emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
   5078};
   5079
   5080static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
   5081{
   5082	int i;
   5083
   5084	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
   5085		adev->gfx.gfx_ring[i].funcs = &gfx_v7_0_ring_funcs_gfx;
   5086	for (i = 0; i < adev->gfx.num_compute_rings; i++)
   5087		adev->gfx.compute_ring[i].funcs = &gfx_v7_0_ring_funcs_compute;
   5088}
   5089
   5090static const struct amdgpu_irq_src_funcs gfx_v7_0_eop_irq_funcs = {
   5091	.set = gfx_v7_0_set_eop_interrupt_state,
   5092	.process = gfx_v7_0_eop_irq,
   5093};
   5094
   5095static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_reg_irq_funcs = {
   5096	.set = gfx_v7_0_set_priv_reg_fault_state,
   5097	.process = gfx_v7_0_priv_reg_irq,
   5098};
   5099
   5100static const struct amdgpu_irq_src_funcs gfx_v7_0_priv_inst_irq_funcs = {
   5101	.set = gfx_v7_0_set_priv_inst_fault_state,
   5102	.process = gfx_v7_0_priv_inst_irq,
   5103};
   5104
   5105static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
   5106{
   5107	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
   5108	adev->gfx.eop_irq.funcs = &gfx_v7_0_eop_irq_funcs;
   5109
   5110	adev->gfx.priv_reg_irq.num_types = 1;
   5111	adev->gfx.priv_reg_irq.funcs = &gfx_v7_0_priv_reg_irq_funcs;
   5112
   5113	adev->gfx.priv_inst_irq.num_types = 1;
   5114	adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
   5115}
   5116
   5117static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
   5118{
   5119	/* init asci gds info */
   5120	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
   5121	adev->gds.gws_size = 64;
   5122	adev->gds.oa_size = 16;
   5123	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
   5124}
   5125
   5126
   5127static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
   5128{
   5129	int i, j, k, counter, active_cu_number = 0;
   5130	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
   5131	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
   5132	unsigned disable_masks[4 * 2];
   5133	u32 ao_cu_num;
   5134
   5135	if (adev->flags & AMD_IS_APU)
   5136		ao_cu_num = 2;
   5137	else
   5138		ao_cu_num = adev->gfx.config.max_cu_per_sh;
   5139
   5140	memset(cu_info, 0, sizeof(*cu_info));
   5141
   5142	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
   5143
   5144	mutex_lock(&adev->grbm_idx_mutex);
   5145	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
   5146		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
   5147			mask = 1;
   5148			ao_bitmap = 0;
   5149			counter = 0;
   5150			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
   5151			if (i < 4 && j < 2)
   5152				gfx_v7_0_set_user_cu_inactive_bitmap(
   5153					adev, disable_masks[i * 2 + j]);
   5154			bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
   5155			cu_info->bitmap[i][j] = bitmap;
   5156
   5157			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
   5158				if (bitmap & mask) {
   5159					if (counter < ao_cu_num)
   5160						ao_bitmap |= mask;
   5161					counter ++;
   5162				}
   5163				mask <<= 1;
   5164			}
   5165			active_cu_number += counter;
   5166			if (i < 2 && j < 2)
   5167				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
   5168			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
   5169		}
   5170	}
   5171	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
   5172	mutex_unlock(&adev->grbm_idx_mutex);
   5173
   5174	cu_info->number = active_cu_number;
   5175	cu_info->ao_cu_mask = ao_cu_mask;
   5176	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
   5177	cu_info->max_waves_per_simd = 10;
   5178	cu_info->max_scratch_slots_per_cu = 32;
   5179	cu_info->wave_front_size = 64;
   5180	cu_info->lds_size = 64;
   5181}
   5182
   5183const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
   5184{
   5185	.type = AMD_IP_BLOCK_TYPE_GFX,
   5186	.major = 7,
   5187	.minor = 1,
   5188	.rev = 0,
   5189	.funcs = &gfx_v7_0_ip_funcs,
   5190};
   5191
   5192const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
   5193{
   5194	.type = AMD_IP_BLOCK_TYPE_GFX,
   5195	.major = 7,
   5196	.minor = 2,
   5197	.rev = 0,
   5198	.funcs = &gfx_v7_0_ip_funcs,
   5199};
   5200
   5201const struct amdgpu_ip_block_version gfx_v7_3_ip_block =
   5202{
   5203	.type = AMD_IP_BLOCK_TYPE_GFX,
   5204	.major = 7,
   5205	.minor = 3,
   5206	.rev = 0,
   5207	.funcs = &gfx_v7_0_ip_funcs,
   5208};