cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

cik.c (285678B)


      1/*
      2 * Copyright 2012 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 * Authors: Alex Deucher
     23 */
     24
     25#include <linux/firmware.h>
     26#include <linux/module.h>
     27#include <linux/pci.h>
     28#include <linux/slab.h>
     29
     30#include <drm/drm_vblank.h>
     31
     32#include "atom.h"
     33#include "evergreen.h"
     34#include "cik_blit_shaders.h"
     35#include "cik.h"
     36#include "cikd.h"
     37#include "clearstate_ci.h"
     38#include "r600.h"
     39#include "radeon.h"
     40#include "radeon_asic.h"
     41#include "radeon_audio.h"
     42#include "radeon_ucode.h"
     43#include "si.h"
     44#include "vce.h"
     45
     46#define SH_MEM_CONFIG_GFX_DEFAULT \
     47	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
     48
     49MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
     50MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
     51MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
     52MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
     53MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
     54MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
     55MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
     56MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
     57MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
     58
     59MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
     60MODULE_FIRMWARE("radeon/bonaire_me.bin");
     61MODULE_FIRMWARE("radeon/bonaire_ce.bin");
     62MODULE_FIRMWARE("radeon/bonaire_mec.bin");
     63MODULE_FIRMWARE("radeon/bonaire_mc.bin");
     64MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
     65MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
     66MODULE_FIRMWARE("radeon/bonaire_smc.bin");
     67MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
     68
     69MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
     70MODULE_FIRMWARE("radeon/HAWAII_me.bin");
     71MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
     72MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
     73MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
     74MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
     75MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
     76MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
     77MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
     78
     79MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
     80MODULE_FIRMWARE("radeon/hawaii_me.bin");
     81MODULE_FIRMWARE("radeon/hawaii_ce.bin");
     82MODULE_FIRMWARE("radeon/hawaii_mec.bin");
     83MODULE_FIRMWARE("radeon/hawaii_mc.bin");
     84MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
     85MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
     86MODULE_FIRMWARE("radeon/hawaii_smc.bin");
     87MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
     88
     89MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
     90MODULE_FIRMWARE("radeon/KAVERI_me.bin");
     91MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
     92MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
     93MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
     94MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
     95
     96MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
     97MODULE_FIRMWARE("radeon/kaveri_me.bin");
     98MODULE_FIRMWARE("radeon/kaveri_ce.bin");
     99MODULE_FIRMWARE("radeon/kaveri_mec.bin");
    100MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
    101MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
    102MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
    103
    104MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
    105MODULE_FIRMWARE("radeon/KABINI_me.bin");
    106MODULE_FIRMWARE("radeon/KABINI_ce.bin");
    107MODULE_FIRMWARE("radeon/KABINI_mec.bin");
    108MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
    109MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
    110
    111MODULE_FIRMWARE("radeon/kabini_pfp.bin");
    112MODULE_FIRMWARE("radeon/kabini_me.bin");
    113MODULE_FIRMWARE("radeon/kabini_ce.bin");
    114MODULE_FIRMWARE("radeon/kabini_mec.bin");
    115MODULE_FIRMWARE("radeon/kabini_rlc.bin");
    116MODULE_FIRMWARE("radeon/kabini_sdma.bin");
    117
    118MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
    119MODULE_FIRMWARE("radeon/MULLINS_me.bin");
    120MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
    121MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
    122MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
    123MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
    124
    125MODULE_FIRMWARE("radeon/mullins_pfp.bin");
    126MODULE_FIRMWARE("radeon/mullins_me.bin");
    127MODULE_FIRMWARE("radeon/mullins_ce.bin");
    128MODULE_FIRMWARE("radeon/mullins_mec.bin");
    129MODULE_FIRMWARE("radeon/mullins_rlc.bin");
    130MODULE_FIRMWARE("radeon/mullins_sdma.bin");
    131
    132static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
    133static void cik_rlc_stop(struct radeon_device *rdev);
    134static void cik_pcie_gen3_enable(struct radeon_device *rdev);
    135static void cik_program_aspm(struct radeon_device *rdev);
    136static void cik_init_pg(struct radeon_device *rdev);
    137static void cik_init_cg(struct radeon_device *rdev);
    138static void cik_fini_pg(struct radeon_device *rdev);
    139static void cik_fini_cg(struct radeon_device *rdev);
    140static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
    141					  bool enable);
    142
    143/**
    144 * cik_get_allowed_info_register - fetch the register for the info ioctl
    145 *
    146 * @rdev: radeon_device pointer
    147 * @reg: register offset in bytes
    148 * @val: register value
    149 *
    150 * Returns 0 for success or -EINVAL for an invalid register
    151 *
    152 */
    153int cik_get_allowed_info_register(struct radeon_device *rdev,
    154				  u32 reg, u32 *val)
    155{
    156	switch (reg) {
    157	case GRBM_STATUS:
    158	case GRBM_STATUS2:
    159	case GRBM_STATUS_SE0:
    160	case GRBM_STATUS_SE1:
    161	case GRBM_STATUS_SE2:
    162	case GRBM_STATUS_SE3:
    163	case SRBM_STATUS:
    164	case SRBM_STATUS2:
    165	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
    166	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
    167	case UVD_STATUS:
    168	/* TODO VCE */
    169		*val = RREG32(reg);
    170		return 0;
    171	default:
    172		return -EINVAL;
    173	}
    174}
    175
    176/*
    177 * Indirect registers accessor
    178 */
    179u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
    180{
    181	unsigned long flags;
    182	u32 r;
    183
    184	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
    185	WREG32(CIK_DIDT_IND_INDEX, (reg));
    186	r = RREG32(CIK_DIDT_IND_DATA);
    187	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
    188	return r;
    189}
    190
    191void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
    192{
    193	unsigned long flags;
    194
    195	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
    196	WREG32(CIK_DIDT_IND_INDEX, (reg));
    197	WREG32(CIK_DIDT_IND_DATA, (v));
    198	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
    199}
    200
    201/* get temperature in millidegrees */
    202int ci_get_temp(struct radeon_device *rdev)
    203{
    204	u32 temp;
    205	int actual_temp = 0;
    206
    207	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
    208		CTF_TEMP_SHIFT;
    209
    210	if (temp & 0x200)
    211		actual_temp = 255;
    212	else
    213		actual_temp = temp & 0x1ff;
    214
    215	return actual_temp * 1000;
    216}
    217
    218/* get temperature in millidegrees */
    219int kv_get_temp(struct radeon_device *rdev)
    220{
    221	u32 temp;
    222	int actual_temp = 0;
    223
    224	temp = RREG32_SMC(0xC0300E0C);
    225
    226	if (temp)
    227		actual_temp = (temp / 8) - 49;
    228	else
    229		actual_temp = 0;
    230
    231	return actual_temp * 1000;
    232}
    233
    234/*
    235 * Indirect registers accessor
    236 */
    237u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
    238{
    239	unsigned long flags;
    240	u32 r;
    241
    242	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
    243	WREG32(PCIE_INDEX, reg);
    244	(void)RREG32(PCIE_INDEX);
    245	r = RREG32(PCIE_DATA);
    246	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
    247	return r;
    248}
    249
    250void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
    251{
    252	unsigned long flags;
    253
    254	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
    255	WREG32(PCIE_INDEX, reg);
    256	(void)RREG32(PCIE_INDEX);
    257	WREG32(PCIE_DATA, v);
    258	(void)RREG32(PCIE_DATA);
    259	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
    260}
    261
    262static const u32 spectre_rlc_save_restore_register_list[] =
    263{
    264	(0x0e00 << 16) | (0xc12c >> 2),
    265	0x00000000,
    266	(0x0e00 << 16) | (0xc140 >> 2),
    267	0x00000000,
    268	(0x0e00 << 16) | (0xc150 >> 2),
    269	0x00000000,
    270	(0x0e00 << 16) | (0xc15c >> 2),
    271	0x00000000,
    272	(0x0e00 << 16) | (0xc168 >> 2),
    273	0x00000000,
    274	(0x0e00 << 16) | (0xc170 >> 2),
    275	0x00000000,
    276	(0x0e00 << 16) | (0xc178 >> 2),
    277	0x00000000,
    278	(0x0e00 << 16) | (0xc204 >> 2),
    279	0x00000000,
    280	(0x0e00 << 16) | (0xc2b4 >> 2),
    281	0x00000000,
    282	(0x0e00 << 16) | (0xc2b8 >> 2),
    283	0x00000000,
    284	(0x0e00 << 16) | (0xc2bc >> 2),
    285	0x00000000,
    286	(0x0e00 << 16) | (0xc2c0 >> 2),
    287	0x00000000,
    288	(0x0e00 << 16) | (0x8228 >> 2),
    289	0x00000000,
    290	(0x0e00 << 16) | (0x829c >> 2),
    291	0x00000000,
    292	(0x0e00 << 16) | (0x869c >> 2),
    293	0x00000000,
    294	(0x0600 << 16) | (0x98f4 >> 2),
    295	0x00000000,
    296	(0x0e00 << 16) | (0x98f8 >> 2),
    297	0x00000000,
    298	(0x0e00 << 16) | (0x9900 >> 2),
    299	0x00000000,
    300	(0x0e00 << 16) | (0xc260 >> 2),
    301	0x00000000,
    302	(0x0e00 << 16) | (0x90e8 >> 2),
    303	0x00000000,
    304	(0x0e00 << 16) | (0x3c000 >> 2),
    305	0x00000000,
    306	(0x0e00 << 16) | (0x3c00c >> 2),
    307	0x00000000,
    308	(0x0e00 << 16) | (0x8c1c >> 2),
    309	0x00000000,
    310	(0x0e00 << 16) | (0x9700 >> 2),
    311	0x00000000,
    312	(0x0e00 << 16) | (0xcd20 >> 2),
    313	0x00000000,
    314	(0x4e00 << 16) | (0xcd20 >> 2),
    315	0x00000000,
    316	(0x5e00 << 16) | (0xcd20 >> 2),
    317	0x00000000,
    318	(0x6e00 << 16) | (0xcd20 >> 2),
    319	0x00000000,
    320	(0x7e00 << 16) | (0xcd20 >> 2),
    321	0x00000000,
    322	(0x8e00 << 16) | (0xcd20 >> 2),
    323	0x00000000,
    324	(0x9e00 << 16) | (0xcd20 >> 2),
    325	0x00000000,
    326	(0xae00 << 16) | (0xcd20 >> 2),
    327	0x00000000,
    328	(0xbe00 << 16) | (0xcd20 >> 2),
    329	0x00000000,
    330	(0x0e00 << 16) | (0x89bc >> 2),
    331	0x00000000,
    332	(0x0e00 << 16) | (0x8900 >> 2),
    333	0x00000000,
    334	0x3,
    335	(0x0e00 << 16) | (0xc130 >> 2),
    336	0x00000000,
    337	(0x0e00 << 16) | (0xc134 >> 2),
    338	0x00000000,
    339	(0x0e00 << 16) | (0xc1fc >> 2),
    340	0x00000000,
    341	(0x0e00 << 16) | (0xc208 >> 2),
    342	0x00000000,
    343	(0x0e00 << 16) | (0xc264 >> 2),
    344	0x00000000,
    345	(0x0e00 << 16) | (0xc268 >> 2),
    346	0x00000000,
    347	(0x0e00 << 16) | (0xc26c >> 2),
    348	0x00000000,
    349	(0x0e00 << 16) | (0xc270 >> 2),
    350	0x00000000,
    351	(0x0e00 << 16) | (0xc274 >> 2),
    352	0x00000000,
    353	(0x0e00 << 16) | (0xc278 >> 2),
    354	0x00000000,
    355	(0x0e00 << 16) | (0xc27c >> 2),
    356	0x00000000,
    357	(0x0e00 << 16) | (0xc280 >> 2),
    358	0x00000000,
    359	(0x0e00 << 16) | (0xc284 >> 2),
    360	0x00000000,
    361	(0x0e00 << 16) | (0xc288 >> 2),
    362	0x00000000,
    363	(0x0e00 << 16) | (0xc28c >> 2),
    364	0x00000000,
    365	(0x0e00 << 16) | (0xc290 >> 2),
    366	0x00000000,
    367	(0x0e00 << 16) | (0xc294 >> 2),
    368	0x00000000,
    369	(0x0e00 << 16) | (0xc298 >> 2),
    370	0x00000000,
    371	(0x0e00 << 16) | (0xc29c >> 2),
    372	0x00000000,
    373	(0x0e00 << 16) | (0xc2a0 >> 2),
    374	0x00000000,
    375	(0x0e00 << 16) | (0xc2a4 >> 2),
    376	0x00000000,
    377	(0x0e00 << 16) | (0xc2a8 >> 2),
    378	0x00000000,
    379	(0x0e00 << 16) | (0xc2ac  >> 2),
    380	0x00000000,
    381	(0x0e00 << 16) | (0xc2b0 >> 2),
    382	0x00000000,
    383	(0x0e00 << 16) | (0x301d0 >> 2),
    384	0x00000000,
    385	(0x0e00 << 16) | (0x30238 >> 2),
    386	0x00000000,
    387	(0x0e00 << 16) | (0x30250 >> 2),
    388	0x00000000,
    389	(0x0e00 << 16) | (0x30254 >> 2),
    390	0x00000000,
    391	(0x0e00 << 16) | (0x30258 >> 2),
    392	0x00000000,
    393	(0x0e00 << 16) | (0x3025c >> 2),
    394	0x00000000,
    395	(0x4e00 << 16) | (0xc900 >> 2),
    396	0x00000000,
    397	(0x5e00 << 16) | (0xc900 >> 2),
    398	0x00000000,
    399	(0x6e00 << 16) | (0xc900 >> 2),
    400	0x00000000,
    401	(0x7e00 << 16) | (0xc900 >> 2),
    402	0x00000000,
    403	(0x8e00 << 16) | (0xc900 >> 2),
    404	0x00000000,
    405	(0x9e00 << 16) | (0xc900 >> 2),
    406	0x00000000,
    407	(0xae00 << 16) | (0xc900 >> 2),
    408	0x00000000,
    409	(0xbe00 << 16) | (0xc900 >> 2),
    410	0x00000000,
    411	(0x4e00 << 16) | (0xc904 >> 2),
    412	0x00000000,
    413	(0x5e00 << 16) | (0xc904 >> 2),
    414	0x00000000,
    415	(0x6e00 << 16) | (0xc904 >> 2),
    416	0x00000000,
    417	(0x7e00 << 16) | (0xc904 >> 2),
    418	0x00000000,
    419	(0x8e00 << 16) | (0xc904 >> 2),
    420	0x00000000,
    421	(0x9e00 << 16) | (0xc904 >> 2),
    422	0x00000000,
    423	(0xae00 << 16) | (0xc904 >> 2),
    424	0x00000000,
    425	(0xbe00 << 16) | (0xc904 >> 2),
    426	0x00000000,
    427	(0x4e00 << 16) | (0xc908 >> 2),
    428	0x00000000,
    429	(0x5e00 << 16) | (0xc908 >> 2),
    430	0x00000000,
    431	(0x6e00 << 16) | (0xc908 >> 2),
    432	0x00000000,
    433	(0x7e00 << 16) | (0xc908 >> 2),
    434	0x00000000,
    435	(0x8e00 << 16) | (0xc908 >> 2),
    436	0x00000000,
    437	(0x9e00 << 16) | (0xc908 >> 2),
    438	0x00000000,
    439	(0xae00 << 16) | (0xc908 >> 2),
    440	0x00000000,
    441	(0xbe00 << 16) | (0xc908 >> 2),
    442	0x00000000,
    443	(0x4e00 << 16) | (0xc90c >> 2),
    444	0x00000000,
    445	(0x5e00 << 16) | (0xc90c >> 2),
    446	0x00000000,
    447	(0x6e00 << 16) | (0xc90c >> 2),
    448	0x00000000,
    449	(0x7e00 << 16) | (0xc90c >> 2),
    450	0x00000000,
    451	(0x8e00 << 16) | (0xc90c >> 2),
    452	0x00000000,
    453	(0x9e00 << 16) | (0xc90c >> 2),
    454	0x00000000,
    455	(0xae00 << 16) | (0xc90c >> 2),
    456	0x00000000,
    457	(0xbe00 << 16) | (0xc90c >> 2),
    458	0x00000000,
    459	(0x4e00 << 16) | (0xc910 >> 2),
    460	0x00000000,
    461	(0x5e00 << 16) | (0xc910 >> 2),
    462	0x00000000,
    463	(0x6e00 << 16) | (0xc910 >> 2),
    464	0x00000000,
    465	(0x7e00 << 16) | (0xc910 >> 2),
    466	0x00000000,
    467	(0x8e00 << 16) | (0xc910 >> 2),
    468	0x00000000,
    469	(0x9e00 << 16) | (0xc910 >> 2),
    470	0x00000000,
    471	(0xae00 << 16) | (0xc910 >> 2),
    472	0x00000000,
    473	(0xbe00 << 16) | (0xc910 >> 2),
    474	0x00000000,
    475	(0x0e00 << 16) | (0xc99c >> 2),
    476	0x00000000,
    477	(0x0e00 << 16) | (0x9834 >> 2),
    478	0x00000000,
    479	(0x0000 << 16) | (0x30f00 >> 2),
    480	0x00000000,
    481	(0x0001 << 16) | (0x30f00 >> 2),
    482	0x00000000,
    483	(0x0000 << 16) | (0x30f04 >> 2),
    484	0x00000000,
    485	(0x0001 << 16) | (0x30f04 >> 2),
    486	0x00000000,
    487	(0x0000 << 16) | (0x30f08 >> 2),
    488	0x00000000,
    489	(0x0001 << 16) | (0x30f08 >> 2),
    490	0x00000000,
    491	(0x0000 << 16) | (0x30f0c >> 2),
    492	0x00000000,
    493	(0x0001 << 16) | (0x30f0c >> 2),
    494	0x00000000,
    495	(0x0600 << 16) | (0x9b7c >> 2),
    496	0x00000000,
    497	(0x0e00 << 16) | (0x8a14 >> 2),
    498	0x00000000,
    499	(0x0e00 << 16) | (0x8a18 >> 2),
    500	0x00000000,
    501	(0x0600 << 16) | (0x30a00 >> 2),
    502	0x00000000,
    503	(0x0e00 << 16) | (0x8bf0 >> 2),
    504	0x00000000,
    505	(0x0e00 << 16) | (0x8bcc >> 2),
    506	0x00000000,
    507	(0x0e00 << 16) | (0x8b24 >> 2),
    508	0x00000000,
    509	(0x0e00 << 16) | (0x30a04 >> 2),
    510	0x00000000,
    511	(0x0600 << 16) | (0x30a10 >> 2),
    512	0x00000000,
    513	(0x0600 << 16) | (0x30a14 >> 2),
    514	0x00000000,
    515	(0x0600 << 16) | (0x30a18 >> 2),
    516	0x00000000,
    517	(0x0600 << 16) | (0x30a2c >> 2),
    518	0x00000000,
    519	(0x0e00 << 16) | (0xc700 >> 2),
    520	0x00000000,
    521	(0x0e00 << 16) | (0xc704 >> 2),
    522	0x00000000,
    523	(0x0e00 << 16) | (0xc708 >> 2),
    524	0x00000000,
    525	(0x0e00 << 16) | (0xc768 >> 2),
    526	0x00000000,
    527	(0x0400 << 16) | (0xc770 >> 2),
    528	0x00000000,
    529	(0x0400 << 16) | (0xc774 >> 2),
    530	0x00000000,
    531	(0x0400 << 16) | (0xc778 >> 2),
    532	0x00000000,
    533	(0x0400 << 16) | (0xc77c >> 2),
    534	0x00000000,
    535	(0x0400 << 16) | (0xc780 >> 2),
    536	0x00000000,
    537	(0x0400 << 16) | (0xc784 >> 2),
    538	0x00000000,
    539	(0x0400 << 16) | (0xc788 >> 2),
    540	0x00000000,
    541	(0x0400 << 16) | (0xc78c >> 2),
    542	0x00000000,
    543	(0x0400 << 16) | (0xc798 >> 2),
    544	0x00000000,
    545	(0x0400 << 16) | (0xc79c >> 2),
    546	0x00000000,
    547	(0x0400 << 16) | (0xc7a0 >> 2),
    548	0x00000000,
    549	(0x0400 << 16) | (0xc7a4 >> 2),
    550	0x00000000,
    551	(0x0400 << 16) | (0xc7a8 >> 2),
    552	0x00000000,
    553	(0x0400 << 16) | (0xc7ac >> 2),
    554	0x00000000,
    555	(0x0400 << 16) | (0xc7b0 >> 2),
    556	0x00000000,
    557	(0x0400 << 16) | (0xc7b4 >> 2),
    558	0x00000000,
    559	(0x0e00 << 16) | (0x9100 >> 2),
    560	0x00000000,
    561	(0x0e00 << 16) | (0x3c010 >> 2),
    562	0x00000000,
    563	(0x0e00 << 16) | (0x92a8 >> 2),
    564	0x00000000,
    565	(0x0e00 << 16) | (0x92ac >> 2),
    566	0x00000000,
    567	(0x0e00 << 16) | (0x92b4 >> 2),
    568	0x00000000,
    569	(0x0e00 << 16) | (0x92b8 >> 2),
    570	0x00000000,
    571	(0x0e00 << 16) | (0x92bc >> 2),
    572	0x00000000,
    573	(0x0e00 << 16) | (0x92c0 >> 2),
    574	0x00000000,
    575	(0x0e00 << 16) | (0x92c4 >> 2),
    576	0x00000000,
    577	(0x0e00 << 16) | (0x92c8 >> 2),
    578	0x00000000,
    579	(0x0e00 << 16) | (0x92cc >> 2),
    580	0x00000000,
    581	(0x0e00 << 16) | (0x92d0 >> 2),
    582	0x00000000,
    583	(0x0e00 << 16) | (0x8c00 >> 2),
    584	0x00000000,
    585	(0x0e00 << 16) | (0x8c04 >> 2),
    586	0x00000000,
    587	(0x0e00 << 16) | (0x8c20 >> 2),
    588	0x00000000,
    589	(0x0e00 << 16) | (0x8c38 >> 2),
    590	0x00000000,
    591	(0x0e00 << 16) | (0x8c3c >> 2),
    592	0x00000000,
    593	(0x0e00 << 16) | (0xae00 >> 2),
    594	0x00000000,
    595	(0x0e00 << 16) | (0x9604 >> 2),
    596	0x00000000,
    597	(0x0e00 << 16) | (0xac08 >> 2),
    598	0x00000000,
    599	(0x0e00 << 16) | (0xac0c >> 2),
    600	0x00000000,
    601	(0x0e00 << 16) | (0xac10 >> 2),
    602	0x00000000,
    603	(0x0e00 << 16) | (0xac14 >> 2),
    604	0x00000000,
    605	(0x0e00 << 16) | (0xac58 >> 2),
    606	0x00000000,
    607	(0x0e00 << 16) | (0xac68 >> 2),
    608	0x00000000,
    609	(0x0e00 << 16) | (0xac6c >> 2),
    610	0x00000000,
    611	(0x0e00 << 16) | (0xac70 >> 2),
    612	0x00000000,
    613	(0x0e00 << 16) | (0xac74 >> 2),
    614	0x00000000,
    615	(0x0e00 << 16) | (0xac78 >> 2),
    616	0x00000000,
    617	(0x0e00 << 16) | (0xac7c >> 2),
    618	0x00000000,
    619	(0x0e00 << 16) | (0xac80 >> 2),
    620	0x00000000,
    621	(0x0e00 << 16) | (0xac84 >> 2),
    622	0x00000000,
    623	(0x0e00 << 16) | (0xac88 >> 2),
    624	0x00000000,
    625	(0x0e00 << 16) | (0xac8c >> 2),
    626	0x00000000,
    627	(0x0e00 << 16) | (0x970c >> 2),
    628	0x00000000,
    629	(0x0e00 << 16) | (0x9714 >> 2),
    630	0x00000000,
    631	(0x0e00 << 16) | (0x9718 >> 2),
    632	0x00000000,
    633	(0x0e00 << 16) | (0x971c >> 2),
    634	0x00000000,
    635	(0x0e00 << 16) | (0x31068 >> 2),
    636	0x00000000,
    637	(0x4e00 << 16) | (0x31068 >> 2),
    638	0x00000000,
    639	(0x5e00 << 16) | (0x31068 >> 2),
    640	0x00000000,
    641	(0x6e00 << 16) | (0x31068 >> 2),
    642	0x00000000,
    643	(0x7e00 << 16) | (0x31068 >> 2),
    644	0x00000000,
    645	(0x8e00 << 16) | (0x31068 >> 2),
    646	0x00000000,
    647	(0x9e00 << 16) | (0x31068 >> 2),
    648	0x00000000,
    649	(0xae00 << 16) | (0x31068 >> 2),
    650	0x00000000,
    651	(0xbe00 << 16) | (0x31068 >> 2),
    652	0x00000000,
    653	(0x0e00 << 16) | (0xcd10 >> 2),
    654	0x00000000,
    655	(0x0e00 << 16) | (0xcd14 >> 2),
    656	0x00000000,
    657	(0x0e00 << 16) | (0x88b0 >> 2),
    658	0x00000000,
    659	(0x0e00 << 16) | (0x88b4 >> 2),
    660	0x00000000,
    661	(0x0e00 << 16) | (0x88b8 >> 2),
    662	0x00000000,
    663	(0x0e00 << 16) | (0x88bc >> 2),
    664	0x00000000,
    665	(0x0400 << 16) | (0x89c0 >> 2),
    666	0x00000000,
    667	(0x0e00 << 16) | (0x88c4 >> 2),
    668	0x00000000,
    669	(0x0e00 << 16) | (0x88c8 >> 2),
    670	0x00000000,
    671	(0x0e00 << 16) | (0x88d0 >> 2),
    672	0x00000000,
    673	(0x0e00 << 16) | (0x88d4 >> 2),
    674	0x00000000,
    675	(0x0e00 << 16) | (0x88d8 >> 2),
    676	0x00000000,
    677	(0x0e00 << 16) | (0x8980 >> 2),
    678	0x00000000,
    679	(0x0e00 << 16) | (0x30938 >> 2),
    680	0x00000000,
    681	(0x0e00 << 16) | (0x3093c >> 2),
    682	0x00000000,
    683	(0x0e00 << 16) | (0x30940 >> 2),
    684	0x00000000,
    685	(0x0e00 << 16) | (0x89a0 >> 2),
    686	0x00000000,
    687	(0x0e00 << 16) | (0x30900 >> 2),
    688	0x00000000,
    689	(0x0e00 << 16) | (0x30904 >> 2),
    690	0x00000000,
    691	(0x0e00 << 16) | (0x89b4 >> 2),
    692	0x00000000,
    693	(0x0e00 << 16) | (0x3c210 >> 2),
    694	0x00000000,
    695	(0x0e00 << 16) | (0x3c214 >> 2),
    696	0x00000000,
    697	(0x0e00 << 16) | (0x3c218 >> 2),
    698	0x00000000,
    699	(0x0e00 << 16) | (0x8904 >> 2),
    700	0x00000000,
    701	0x5,
    702	(0x0e00 << 16) | (0x8c28 >> 2),
    703	(0x0e00 << 16) | (0x8c2c >> 2),
    704	(0x0e00 << 16) | (0x8c30 >> 2),
    705	(0x0e00 << 16) | (0x8c34 >> 2),
    706	(0x0e00 << 16) | (0x9600 >> 2),
    707};
    708
    709static const u32 kalindi_rlc_save_restore_register_list[] =
    710{
    711	(0x0e00 << 16) | (0xc12c >> 2),
    712	0x00000000,
    713	(0x0e00 << 16) | (0xc140 >> 2),
    714	0x00000000,
    715	(0x0e00 << 16) | (0xc150 >> 2),
    716	0x00000000,
    717	(0x0e00 << 16) | (0xc15c >> 2),
    718	0x00000000,
    719	(0x0e00 << 16) | (0xc168 >> 2),
    720	0x00000000,
    721	(0x0e00 << 16) | (0xc170 >> 2),
    722	0x00000000,
    723	(0x0e00 << 16) | (0xc204 >> 2),
    724	0x00000000,
    725	(0x0e00 << 16) | (0xc2b4 >> 2),
    726	0x00000000,
    727	(0x0e00 << 16) | (0xc2b8 >> 2),
    728	0x00000000,
    729	(0x0e00 << 16) | (0xc2bc >> 2),
    730	0x00000000,
    731	(0x0e00 << 16) | (0xc2c0 >> 2),
    732	0x00000000,
    733	(0x0e00 << 16) | (0x8228 >> 2),
    734	0x00000000,
    735	(0x0e00 << 16) | (0x829c >> 2),
    736	0x00000000,
    737	(0x0e00 << 16) | (0x869c >> 2),
    738	0x00000000,
    739	(0x0600 << 16) | (0x98f4 >> 2),
    740	0x00000000,
    741	(0x0e00 << 16) | (0x98f8 >> 2),
    742	0x00000000,
    743	(0x0e00 << 16) | (0x9900 >> 2),
    744	0x00000000,
    745	(0x0e00 << 16) | (0xc260 >> 2),
    746	0x00000000,
    747	(0x0e00 << 16) | (0x90e8 >> 2),
    748	0x00000000,
    749	(0x0e00 << 16) | (0x3c000 >> 2),
    750	0x00000000,
    751	(0x0e00 << 16) | (0x3c00c >> 2),
    752	0x00000000,
    753	(0x0e00 << 16) | (0x8c1c >> 2),
    754	0x00000000,
    755	(0x0e00 << 16) | (0x9700 >> 2),
    756	0x00000000,
    757	(0x0e00 << 16) | (0xcd20 >> 2),
    758	0x00000000,
    759	(0x4e00 << 16) | (0xcd20 >> 2),
    760	0x00000000,
    761	(0x5e00 << 16) | (0xcd20 >> 2),
    762	0x00000000,
    763	(0x6e00 << 16) | (0xcd20 >> 2),
    764	0x00000000,
    765	(0x7e00 << 16) | (0xcd20 >> 2),
    766	0x00000000,
    767	(0x0e00 << 16) | (0x89bc >> 2),
    768	0x00000000,
    769	(0x0e00 << 16) | (0x8900 >> 2),
    770	0x00000000,
    771	0x3,
    772	(0x0e00 << 16) | (0xc130 >> 2),
    773	0x00000000,
    774	(0x0e00 << 16) | (0xc134 >> 2),
    775	0x00000000,
    776	(0x0e00 << 16) | (0xc1fc >> 2),
    777	0x00000000,
    778	(0x0e00 << 16) | (0xc208 >> 2),
    779	0x00000000,
    780	(0x0e00 << 16) | (0xc264 >> 2),
    781	0x00000000,
    782	(0x0e00 << 16) | (0xc268 >> 2),
    783	0x00000000,
    784	(0x0e00 << 16) | (0xc26c >> 2),
    785	0x00000000,
    786	(0x0e00 << 16) | (0xc270 >> 2),
    787	0x00000000,
    788	(0x0e00 << 16) | (0xc274 >> 2),
    789	0x00000000,
    790	(0x0e00 << 16) | (0xc28c >> 2),
    791	0x00000000,
    792	(0x0e00 << 16) | (0xc290 >> 2),
    793	0x00000000,
    794	(0x0e00 << 16) | (0xc294 >> 2),
    795	0x00000000,
    796	(0x0e00 << 16) | (0xc298 >> 2),
    797	0x00000000,
    798	(0x0e00 << 16) | (0xc2a0 >> 2),
    799	0x00000000,
    800	(0x0e00 << 16) | (0xc2a4 >> 2),
    801	0x00000000,
    802	(0x0e00 << 16) | (0xc2a8 >> 2),
    803	0x00000000,
    804	(0x0e00 << 16) | (0xc2ac >> 2),
    805	0x00000000,
    806	(0x0e00 << 16) | (0x301d0 >> 2),
    807	0x00000000,
    808	(0x0e00 << 16) | (0x30238 >> 2),
    809	0x00000000,
    810	(0x0e00 << 16) | (0x30250 >> 2),
    811	0x00000000,
    812	(0x0e00 << 16) | (0x30254 >> 2),
    813	0x00000000,
    814	(0x0e00 << 16) | (0x30258 >> 2),
    815	0x00000000,
    816	(0x0e00 << 16) | (0x3025c >> 2),
    817	0x00000000,
    818	(0x4e00 << 16) | (0xc900 >> 2),
    819	0x00000000,
    820	(0x5e00 << 16) | (0xc900 >> 2),
    821	0x00000000,
    822	(0x6e00 << 16) | (0xc900 >> 2),
    823	0x00000000,
    824	(0x7e00 << 16) | (0xc900 >> 2),
    825	0x00000000,
    826	(0x4e00 << 16) | (0xc904 >> 2),
    827	0x00000000,
    828	(0x5e00 << 16) | (0xc904 >> 2),
    829	0x00000000,
    830	(0x6e00 << 16) | (0xc904 >> 2),
    831	0x00000000,
    832	(0x7e00 << 16) | (0xc904 >> 2),
    833	0x00000000,
    834	(0x4e00 << 16) | (0xc908 >> 2),
    835	0x00000000,
    836	(0x5e00 << 16) | (0xc908 >> 2),
    837	0x00000000,
    838	(0x6e00 << 16) | (0xc908 >> 2),
    839	0x00000000,
    840	(0x7e00 << 16) | (0xc908 >> 2),
    841	0x00000000,
    842	(0x4e00 << 16) | (0xc90c >> 2),
    843	0x00000000,
    844	(0x5e00 << 16) | (0xc90c >> 2),
    845	0x00000000,
    846	(0x6e00 << 16) | (0xc90c >> 2),
    847	0x00000000,
    848	(0x7e00 << 16) | (0xc90c >> 2),
    849	0x00000000,
    850	(0x4e00 << 16) | (0xc910 >> 2),
    851	0x00000000,
    852	(0x5e00 << 16) | (0xc910 >> 2),
    853	0x00000000,
    854	(0x6e00 << 16) | (0xc910 >> 2),
    855	0x00000000,
    856	(0x7e00 << 16) | (0xc910 >> 2),
    857	0x00000000,
    858	(0x0e00 << 16) | (0xc99c >> 2),
    859	0x00000000,
    860	(0x0e00 << 16) | (0x9834 >> 2),
    861	0x00000000,
    862	(0x0000 << 16) | (0x30f00 >> 2),
    863	0x00000000,
    864	(0x0000 << 16) | (0x30f04 >> 2),
    865	0x00000000,
    866	(0x0000 << 16) | (0x30f08 >> 2),
    867	0x00000000,
    868	(0x0000 << 16) | (0x30f0c >> 2),
    869	0x00000000,
    870	(0x0600 << 16) | (0x9b7c >> 2),
    871	0x00000000,
    872	(0x0e00 << 16) | (0x8a14 >> 2),
    873	0x00000000,
    874	(0x0e00 << 16) | (0x8a18 >> 2),
    875	0x00000000,
    876	(0x0600 << 16) | (0x30a00 >> 2),
    877	0x00000000,
    878	(0x0e00 << 16) | (0x8bf0 >> 2),
    879	0x00000000,
    880	(0x0e00 << 16) | (0x8bcc >> 2),
    881	0x00000000,
    882	(0x0e00 << 16) | (0x8b24 >> 2),
    883	0x00000000,
    884	(0x0e00 << 16) | (0x30a04 >> 2),
    885	0x00000000,
    886	(0x0600 << 16) | (0x30a10 >> 2),
    887	0x00000000,
    888	(0x0600 << 16) | (0x30a14 >> 2),
    889	0x00000000,
    890	(0x0600 << 16) | (0x30a18 >> 2),
    891	0x00000000,
    892	(0x0600 << 16) | (0x30a2c >> 2),
    893	0x00000000,
    894	(0x0e00 << 16) | (0xc700 >> 2),
    895	0x00000000,
    896	(0x0e00 << 16) | (0xc704 >> 2),
    897	0x00000000,
    898	(0x0e00 << 16) | (0xc708 >> 2),
    899	0x00000000,
    900	(0x0e00 << 16) | (0xc768 >> 2),
    901	0x00000000,
    902	(0x0400 << 16) | (0xc770 >> 2),
    903	0x00000000,
    904	(0x0400 << 16) | (0xc774 >> 2),
    905	0x00000000,
    906	(0x0400 << 16) | (0xc798 >> 2),
    907	0x00000000,
    908	(0x0400 << 16) | (0xc79c >> 2),
    909	0x00000000,
    910	(0x0e00 << 16) | (0x9100 >> 2),
    911	0x00000000,
    912	(0x0e00 << 16) | (0x3c010 >> 2),
    913	0x00000000,
    914	(0x0e00 << 16) | (0x8c00 >> 2),
    915	0x00000000,
    916	(0x0e00 << 16) | (0x8c04 >> 2),
    917	0x00000000,
    918	(0x0e00 << 16) | (0x8c20 >> 2),
    919	0x00000000,
    920	(0x0e00 << 16) | (0x8c38 >> 2),
    921	0x00000000,
    922	(0x0e00 << 16) | (0x8c3c >> 2),
    923	0x00000000,
    924	(0x0e00 << 16) | (0xae00 >> 2),
    925	0x00000000,
    926	(0x0e00 << 16) | (0x9604 >> 2),
    927	0x00000000,
    928	(0x0e00 << 16) | (0xac08 >> 2),
    929	0x00000000,
    930	(0x0e00 << 16) | (0xac0c >> 2),
    931	0x00000000,
    932	(0x0e00 << 16) | (0xac10 >> 2),
    933	0x00000000,
    934	(0x0e00 << 16) | (0xac14 >> 2),
    935	0x00000000,
    936	(0x0e00 << 16) | (0xac58 >> 2),
    937	0x00000000,
    938	(0x0e00 << 16) | (0xac68 >> 2),
    939	0x00000000,
    940	(0x0e00 << 16) | (0xac6c >> 2),
    941	0x00000000,
    942	(0x0e00 << 16) | (0xac70 >> 2),
    943	0x00000000,
    944	(0x0e00 << 16) | (0xac74 >> 2),
    945	0x00000000,
    946	(0x0e00 << 16) | (0xac78 >> 2),
    947	0x00000000,
    948	(0x0e00 << 16) | (0xac7c >> 2),
    949	0x00000000,
    950	(0x0e00 << 16) | (0xac80 >> 2),
    951	0x00000000,
    952	(0x0e00 << 16) | (0xac84 >> 2),
    953	0x00000000,
    954	(0x0e00 << 16) | (0xac88 >> 2),
    955	0x00000000,
    956	(0x0e00 << 16) | (0xac8c >> 2),
    957	0x00000000,
    958	(0x0e00 << 16) | (0x970c >> 2),
    959	0x00000000,
    960	(0x0e00 << 16) | (0x9714 >> 2),
    961	0x00000000,
    962	(0x0e00 << 16) | (0x9718 >> 2),
    963	0x00000000,
    964	(0x0e00 << 16) | (0x971c >> 2),
    965	0x00000000,
    966	(0x0e00 << 16) | (0x31068 >> 2),
    967	0x00000000,
    968	(0x4e00 << 16) | (0x31068 >> 2),
    969	0x00000000,
    970	(0x5e00 << 16) | (0x31068 >> 2),
    971	0x00000000,
    972	(0x6e00 << 16) | (0x31068 >> 2),
    973	0x00000000,
    974	(0x7e00 << 16) | (0x31068 >> 2),
    975	0x00000000,
    976	(0x0e00 << 16) | (0xcd10 >> 2),
    977	0x00000000,
    978	(0x0e00 << 16) | (0xcd14 >> 2),
    979	0x00000000,
    980	(0x0e00 << 16) | (0x88b0 >> 2),
    981	0x00000000,
    982	(0x0e00 << 16) | (0x88b4 >> 2),
    983	0x00000000,
    984	(0x0e00 << 16) | (0x88b8 >> 2),
    985	0x00000000,
    986	(0x0e00 << 16) | (0x88bc >> 2),
    987	0x00000000,
    988	(0x0400 << 16) | (0x89c0 >> 2),
    989	0x00000000,
    990	(0x0e00 << 16) | (0x88c4 >> 2),
    991	0x00000000,
    992	(0x0e00 << 16) | (0x88c8 >> 2),
    993	0x00000000,
    994	(0x0e00 << 16) | (0x88d0 >> 2),
    995	0x00000000,
    996	(0x0e00 << 16) | (0x88d4 >> 2),
    997	0x00000000,
    998	(0x0e00 << 16) | (0x88d8 >> 2),
    999	0x00000000,
   1000	(0x0e00 << 16) | (0x8980 >> 2),
   1001	0x00000000,
   1002	(0x0e00 << 16) | (0x30938 >> 2),
   1003	0x00000000,
   1004	(0x0e00 << 16) | (0x3093c >> 2),
   1005	0x00000000,
   1006	(0x0e00 << 16) | (0x30940 >> 2),
   1007	0x00000000,
   1008	(0x0e00 << 16) | (0x89a0 >> 2),
   1009	0x00000000,
   1010	(0x0e00 << 16) | (0x30900 >> 2),
   1011	0x00000000,
   1012	(0x0e00 << 16) | (0x30904 >> 2),
   1013	0x00000000,
   1014	(0x0e00 << 16) | (0x89b4 >> 2),
   1015	0x00000000,
   1016	(0x0e00 << 16) | (0x3e1fc >> 2),
   1017	0x00000000,
   1018	(0x0e00 << 16) | (0x3c210 >> 2),
   1019	0x00000000,
   1020	(0x0e00 << 16) | (0x3c214 >> 2),
   1021	0x00000000,
   1022	(0x0e00 << 16) | (0x3c218 >> 2),
   1023	0x00000000,
   1024	(0x0e00 << 16) | (0x8904 >> 2),
   1025	0x00000000,
   1026	0x5,
   1027	(0x0e00 << 16) | (0x8c28 >> 2),
   1028	(0x0e00 << 16) | (0x8c2c >> 2),
   1029	(0x0e00 << 16) | (0x8c30 >> 2),
   1030	(0x0e00 << 16) | (0x8c34 >> 2),
   1031	(0x0e00 << 16) | (0x9600 >> 2),
   1032};
   1033
   1034static const u32 bonaire_golden_spm_registers[] =
   1035{
   1036	0x30800, 0xe0ffffff, 0xe0000000
   1037};
   1038
   1039static const u32 bonaire_golden_common_registers[] =
   1040{
   1041	0xc770, 0xffffffff, 0x00000800,
   1042	0xc774, 0xffffffff, 0x00000800,
   1043	0xc798, 0xffffffff, 0x00007fbf,
   1044	0xc79c, 0xffffffff, 0x00007faf
   1045};
   1046
   1047static const u32 bonaire_golden_registers[] =
   1048{
   1049	0x3354, 0x00000333, 0x00000333,
   1050	0x3350, 0x000c0fc0, 0x00040200,
   1051	0x9a10, 0x00010000, 0x00058208,
   1052	0x3c000, 0xffff1fff, 0x00140000,
   1053	0x3c200, 0xfdfc0fff, 0x00000100,
   1054	0x3c234, 0x40000000, 0x40000200,
   1055	0x9830, 0xffffffff, 0x00000000,
   1056	0x9834, 0xf00fffff, 0x00000400,
   1057	0x9838, 0x0002021c, 0x00020200,
   1058	0xc78, 0x00000080, 0x00000000,
   1059	0x5bb0, 0x000000f0, 0x00000070,
   1060	0x5bc0, 0xf0311fff, 0x80300000,
   1061	0x98f8, 0x73773777, 0x12010001,
   1062	0x350c, 0x00810000, 0x408af000,
   1063	0x7030, 0x31000111, 0x00000011,
   1064	0x2f48, 0x73773777, 0x12010001,
   1065	0x220c, 0x00007fb6, 0x0021a1b1,
   1066	0x2210, 0x00007fb6, 0x002021b1,
   1067	0x2180, 0x00007fb6, 0x00002191,
   1068	0x2218, 0x00007fb6, 0x002121b1,
   1069	0x221c, 0x00007fb6, 0x002021b1,
   1070	0x21dc, 0x00007fb6, 0x00002191,
   1071	0x21e0, 0x00007fb6, 0x00002191,
   1072	0x3628, 0x0000003f, 0x0000000a,
   1073	0x362c, 0x0000003f, 0x0000000a,
   1074	0x2ae4, 0x00073ffe, 0x000022a2,
   1075	0x240c, 0x000007ff, 0x00000000,
   1076	0x8a14, 0xf000003f, 0x00000007,
   1077	0x8bf0, 0x00002001, 0x00000001,
   1078	0x8b24, 0xffffffff, 0x00ffffff,
   1079	0x30a04, 0x0000ff0f, 0x00000000,
   1080	0x28a4c, 0x07ffffff, 0x06000000,
   1081	0x4d8, 0x00000fff, 0x00000100,
   1082	0x3e78, 0x00000001, 0x00000002,
   1083	0x9100, 0x03000000, 0x0362c688,
   1084	0x8c00, 0x000000ff, 0x00000001,
   1085	0xe40, 0x00001fff, 0x00001fff,
   1086	0x9060, 0x0000007f, 0x00000020,
   1087	0x9508, 0x00010000, 0x00010000,
   1088	0xac14, 0x000003ff, 0x000000f3,
   1089	0xac0c, 0xffffffff, 0x00001032
   1090};
   1091
   1092static const u32 bonaire_mgcg_cgcg_init[] =
   1093{
   1094	0xc420, 0xffffffff, 0xfffffffc,
   1095	0x30800, 0xffffffff, 0xe0000000,
   1096	0x3c2a0, 0xffffffff, 0x00000100,
   1097	0x3c208, 0xffffffff, 0x00000100,
   1098	0x3c2c0, 0xffffffff, 0xc0000100,
   1099	0x3c2c8, 0xffffffff, 0xc0000100,
   1100	0x3c2c4, 0xffffffff, 0xc0000100,
   1101	0x55e4, 0xffffffff, 0x00600100,
   1102	0x3c280, 0xffffffff, 0x00000100,
   1103	0x3c214, 0xffffffff, 0x06000100,
   1104	0x3c220, 0xffffffff, 0x00000100,
   1105	0x3c218, 0xffffffff, 0x06000100,
   1106	0x3c204, 0xffffffff, 0x00000100,
   1107	0x3c2e0, 0xffffffff, 0x00000100,
   1108	0x3c224, 0xffffffff, 0x00000100,
   1109	0x3c200, 0xffffffff, 0x00000100,
   1110	0x3c230, 0xffffffff, 0x00000100,
   1111	0x3c234, 0xffffffff, 0x00000100,
   1112	0x3c250, 0xffffffff, 0x00000100,
   1113	0x3c254, 0xffffffff, 0x00000100,
   1114	0x3c258, 0xffffffff, 0x00000100,
   1115	0x3c25c, 0xffffffff, 0x00000100,
   1116	0x3c260, 0xffffffff, 0x00000100,
   1117	0x3c27c, 0xffffffff, 0x00000100,
   1118	0x3c278, 0xffffffff, 0x00000100,
   1119	0x3c210, 0xffffffff, 0x06000100,
   1120	0x3c290, 0xffffffff, 0x00000100,
   1121	0x3c274, 0xffffffff, 0x00000100,
   1122	0x3c2b4, 0xffffffff, 0x00000100,
   1123	0x3c2b0, 0xffffffff, 0x00000100,
   1124	0x3c270, 0xffffffff, 0x00000100,
   1125	0x30800, 0xffffffff, 0xe0000000,
   1126	0x3c020, 0xffffffff, 0x00010000,
   1127	0x3c024, 0xffffffff, 0x00030002,
   1128	0x3c028, 0xffffffff, 0x00040007,
   1129	0x3c02c, 0xffffffff, 0x00060005,
   1130	0x3c030, 0xffffffff, 0x00090008,
   1131	0x3c034, 0xffffffff, 0x00010000,
   1132	0x3c038, 0xffffffff, 0x00030002,
   1133	0x3c03c, 0xffffffff, 0x00040007,
   1134	0x3c040, 0xffffffff, 0x00060005,
   1135	0x3c044, 0xffffffff, 0x00090008,
   1136	0x3c048, 0xffffffff, 0x00010000,
   1137	0x3c04c, 0xffffffff, 0x00030002,
   1138	0x3c050, 0xffffffff, 0x00040007,
   1139	0x3c054, 0xffffffff, 0x00060005,
   1140	0x3c058, 0xffffffff, 0x00090008,
   1141	0x3c05c, 0xffffffff, 0x00010000,
   1142	0x3c060, 0xffffffff, 0x00030002,
   1143	0x3c064, 0xffffffff, 0x00040007,
   1144	0x3c068, 0xffffffff, 0x00060005,
   1145	0x3c06c, 0xffffffff, 0x00090008,
   1146	0x3c070, 0xffffffff, 0x00010000,
   1147	0x3c074, 0xffffffff, 0x00030002,
   1148	0x3c078, 0xffffffff, 0x00040007,
   1149	0x3c07c, 0xffffffff, 0x00060005,
   1150	0x3c080, 0xffffffff, 0x00090008,
   1151	0x3c084, 0xffffffff, 0x00010000,
   1152	0x3c088, 0xffffffff, 0x00030002,
   1153	0x3c08c, 0xffffffff, 0x00040007,
   1154	0x3c090, 0xffffffff, 0x00060005,
   1155	0x3c094, 0xffffffff, 0x00090008,
   1156	0x3c098, 0xffffffff, 0x00010000,
   1157	0x3c09c, 0xffffffff, 0x00030002,
   1158	0x3c0a0, 0xffffffff, 0x00040007,
   1159	0x3c0a4, 0xffffffff, 0x00060005,
   1160	0x3c0a8, 0xffffffff, 0x00090008,
   1161	0x3c000, 0xffffffff, 0x96e00200,
   1162	0x8708, 0xffffffff, 0x00900100,
   1163	0xc424, 0xffffffff, 0x0020003f,
   1164	0x38, 0xffffffff, 0x0140001c,
   1165	0x3c, 0x000f0000, 0x000f0000,
   1166	0x220, 0xffffffff, 0xC060000C,
   1167	0x224, 0xc0000fff, 0x00000100,
   1168	0xf90, 0xffffffff, 0x00000100,
   1169	0xf98, 0x00000101, 0x00000000,
   1170	0x20a8, 0xffffffff, 0x00000104,
   1171	0x55e4, 0xff000fff, 0x00000100,
   1172	0x30cc, 0xc0000fff, 0x00000104,
   1173	0xc1e4, 0x00000001, 0x00000001,
   1174	0xd00c, 0xff000ff0, 0x00000100,
   1175	0xd80c, 0xff000ff0, 0x00000100
   1176};
   1177
   1178static const u32 spectre_golden_spm_registers[] =
   1179{
   1180	0x30800, 0xe0ffffff, 0xe0000000
   1181};
   1182
   1183static const u32 spectre_golden_common_registers[] =
   1184{
   1185	0xc770, 0xffffffff, 0x00000800,
   1186	0xc774, 0xffffffff, 0x00000800,
   1187	0xc798, 0xffffffff, 0x00007fbf,
   1188	0xc79c, 0xffffffff, 0x00007faf
   1189};
   1190
   1191static const u32 spectre_golden_registers[] =
   1192{
   1193	0x3c000, 0xffff1fff, 0x96940200,
   1194	0x3c00c, 0xffff0001, 0xff000000,
   1195	0x3c200, 0xfffc0fff, 0x00000100,
   1196	0x6ed8, 0x00010101, 0x00010000,
   1197	0x9834, 0xf00fffff, 0x00000400,
   1198	0x9838, 0xfffffffc, 0x00020200,
   1199	0x5bb0, 0x000000f0, 0x00000070,
   1200	0x5bc0, 0xf0311fff, 0x80300000,
   1201	0x98f8, 0x73773777, 0x12010001,
   1202	0x9b7c, 0x00ff0000, 0x00fc0000,
   1203	0x2f48, 0x73773777, 0x12010001,
   1204	0x8a14, 0xf000003f, 0x00000007,
   1205	0x8b24, 0xffffffff, 0x00ffffff,
   1206	0x28350, 0x3f3f3fff, 0x00000082,
   1207	0x28354, 0x0000003f, 0x00000000,
   1208	0x3e78, 0x00000001, 0x00000002,
   1209	0x913c, 0xffff03df, 0x00000004,
   1210	0xc768, 0x00000008, 0x00000008,
   1211	0x8c00, 0x000008ff, 0x00000800,
   1212	0x9508, 0x00010000, 0x00010000,
   1213	0xac0c, 0xffffffff, 0x54763210,
   1214	0x214f8, 0x01ff01ff, 0x00000002,
   1215	0x21498, 0x007ff800, 0x00200000,
   1216	0x2015c, 0xffffffff, 0x00000f40,
   1217	0x30934, 0xffffffff, 0x00000001
   1218};
   1219
   1220static const u32 spectre_mgcg_cgcg_init[] =
   1221{
   1222	0xc420, 0xffffffff, 0xfffffffc,
   1223	0x30800, 0xffffffff, 0xe0000000,
   1224	0x3c2a0, 0xffffffff, 0x00000100,
   1225	0x3c208, 0xffffffff, 0x00000100,
   1226	0x3c2c0, 0xffffffff, 0x00000100,
   1227	0x3c2c8, 0xffffffff, 0x00000100,
   1228	0x3c2c4, 0xffffffff, 0x00000100,
   1229	0x55e4, 0xffffffff, 0x00600100,
   1230	0x3c280, 0xffffffff, 0x00000100,
   1231	0x3c214, 0xffffffff, 0x06000100,
   1232	0x3c220, 0xffffffff, 0x00000100,
   1233	0x3c218, 0xffffffff, 0x06000100,
   1234	0x3c204, 0xffffffff, 0x00000100,
   1235	0x3c2e0, 0xffffffff, 0x00000100,
   1236	0x3c224, 0xffffffff, 0x00000100,
   1237	0x3c200, 0xffffffff, 0x00000100,
   1238	0x3c230, 0xffffffff, 0x00000100,
   1239	0x3c234, 0xffffffff, 0x00000100,
   1240	0x3c250, 0xffffffff, 0x00000100,
   1241	0x3c254, 0xffffffff, 0x00000100,
   1242	0x3c258, 0xffffffff, 0x00000100,
   1243	0x3c25c, 0xffffffff, 0x00000100,
   1244	0x3c260, 0xffffffff, 0x00000100,
   1245	0x3c27c, 0xffffffff, 0x00000100,
   1246	0x3c278, 0xffffffff, 0x00000100,
   1247	0x3c210, 0xffffffff, 0x06000100,
   1248	0x3c290, 0xffffffff, 0x00000100,
   1249	0x3c274, 0xffffffff, 0x00000100,
   1250	0x3c2b4, 0xffffffff, 0x00000100,
   1251	0x3c2b0, 0xffffffff, 0x00000100,
   1252	0x3c270, 0xffffffff, 0x00000100,
   1253	0x30800, 0xffffffff, 0xe0000000,
   1254	0x3c020, 0xffffffff, 0x00010000,
   1255	0x3c024, 0xffffffff, 0x00030002,
   1256	0x3c028, 0xffffffff, 0x00040007,
   1257	0x3c02c, 0xffffffff, 0x00060005,
   1258	0x3c030, 0xffffffff, 0x00090008,
   1259	0x3c034, 0xffffffff, 0x00010000,
   1260	0x3c038, 0xffffffff, 0x00030002,
   1261	0x3c03c, 0xffffffff, 0x00040007,
   1262	0x3c040, 0xffffffff, 0x00060005,
   1263	0x3c044, 0xffffffff, 0x00090008,
   1264	0x3c048, 0xffffffff, 0x00010000,
   1265	0x3c04c, 0xffffffff, 0x00030002,
   1266	0x3c050, 0xffffffff, 0x00040007,
   1267	0x3c054, 0xffffffff, 0x00060005,
   1268	0x3c058, 0xffffffff, 0x00090008,
   1269	0x3c05c, 0xffffffff, 0x00010000,
   1270	0x3c060, 0xffffffff, 0x00030002,
   1271	0x3c064, 0xffffffff, 0x00040007,
   1272	0x3c068, 0xffffffff, 0x00060005,
   1273	0x3c06c, 0xffffffff, 0x00090008,
   1274	0x3c070, 0xffffffff, 0x00010000,
   1275	0x3c074, 0xffffffff, 0x00030002,
   1276	0x3c078, 0xffffffff, 0x00040007,
   1277	0x3c07c, 0xffffffff, 0x00060005,
   1278	0x3c080, 0xffffffff, 0x00090008,
   1279	0x3c084, 0xffffffff, 0x00010000,
   1280	0x3c088, 0xffffffff, 0x00030002,
   1281	0x3c08c, 0xffffffff, 0x00040007,
   1282	0x3c090, 0xffffffff, 0x00060005,
   1283	0x3c094, 0xffffffff, 0x00090008,
   1284	0x3c098, 0xffffffff, 0x00010000,
   1285	0x3c09c, 0xffffffff, 0x00030002,
   1286	0x3c0a0, 0xffffffff, 0x00040007,
   1287	0x3c0a4, 0xffffffff, 0x00060005,
   1288	0x3c0a8, 0xffffffff, 0x00090008,
   1289	0x3c0ac, 0xffffffff, 0x00010000,
   1290	0x3c0b0, 0xffffffff, 0x00030002,
   1291	0x3c0b4, 0xffffffff, 0x00040007,
   1292	0x3c0b8, 0xffffffff, 0x00060005,
   1293	0x3c0bc, 0xffffffff, 0x00090008,
   1294	0x3c000, 0xffffffff, 0x96e00200,
   1295	0x8708, 0xffffffff, 0x00900100,
   1296	0xc424, 0xffffffff, 0x0020003f,
   1297	0x38, 0xffffffff, 0x0140001c,
   1298	0x3c, 0x000f0000, 0x000f0000,
   1299	0x220, 0xffffffff, 0xC060000C,
   1300	0x224, 0xc0000fff, 0x00000100,
   1301	0xf90, 0xffffffff, 0x00000100,
   1302	0xf98, 0x00000101, 0x00000000,
   1303	0x20a8, 0xffffffff, 0x00000104,
   1304	0x55e4, 0xff000fff, 0x00000100,
   1305	0x30cc, 0xc0000fff, 0x00000104,
   1306	0xc1e4, 0x00000001, 0x00000001,
   1307	0xd00c, 0xff000ff0, 0x00000100,
   1308	0xd80c, 0xff000ff0, 0x00000100
   1309};
   1310
   1311static const u32 kalindi_golden_spm_registers[] =
   1312{
   1313	0x30800, 0xe0ffffff, 0xe0000000
   1314};
   1315
   1316static const u32 kalindi_golden_common_registers[] =
   1317{
   1318	0xc770, 0xffffffff, 0x00000800,
   1319	0xc774, 0xffffffff, 0x00000800,
   1320	0xc798, 0xffffffff, 0x00007fbf,
   1321	0xc79c, 0xffffffff, 0x00007faf
   1322};
   1323
   1324static const u32 kalindi_golden_registers[] =
   1325{
   1326	0x3c000, 0xffffdfff, 0x6e944040,
   1327	0x55e4, 0xff607fff, 0xfc000100,
   1328	0x3c220, 0xff000fff, 0x00000100,
   1329	0x3c224, 0xff000fff, 0x00000100,
   1330	0x3c200, 0xfffc0fff, 0x00000100,
   1331	0x6ed8, 0x00010101, 0x00010000,
   1332	0x9830, 0xffffffff, 0x00000000,
   1333	0x9834, 0xf00fffff, 0x00000400,
   1334	0x5bb0, 0x000000f0, 0x00000070,
   1335	0x5bc0, 0xf0311fff, 0x80300000,
   1336	0x98f8, 0x73773777, 0x12010001,
   1337	0x98fc, 0xffffffff, 0x00000010,
   1338	0x9b7c, 0x00ff0000, 0x00fc0000,
   1339	0x8030, 0x00001f0f, 0x0000100a,
   1340	0x2f48, 0x73773777, 0x12010001,
   1341	0x2408, 0x000fffff, 0x000c007f,
   1342	0x8a14, 0xf000003f, 0x00000007,
   1343	0x8b24, 0x3fff3fff, 0x00ffcfff,
   1344	0x30a04, 0x0000ff0f, 0x00000000,
   1345	0x28a4c, 0x07ffffff, 0x06000000,
   1346	0x4d8, 0x00000fff, 0x00000100,
   1347	0x3e78, 0x00000001, 0x00000002,
   1348	0xc768, 0x00000008, 0x00000008,
   1349	0x8c00, 0x000000ff, 0x00000003,
   1350	0x214f8, 0x01ff01ff, 0x00000002,
   1351	0x21498, 0x007ff800, 0x00200000,
   1352	0x2015c, 0xffffffff, 0x00000f40,
   1353	0x88c4, 0x001f3ae3, 0x00000082,
   1354	0x88d4, 0x0000001f, 0x00000010,
   1355	0x30934, 0xffffffff, 0x00000000
   1356};
   1357
   1358static const u32 kalindi_mgcg_cgcg_init[] =
   1359{
   1360	0xc420, 0xffffffff, 0xfffffffc,
   1361	0x30800, 0xffffffff, 0xe0000000,
   1362	0x3c2a0, 0xffffffff, 0x00000100,
   1363	0x3c208, 0xffffffff, 0x00000100,
   1364	0x3c2c0, 0xffffffff, 0x00000100,
   1365	0x3c2c8, 0xffffffff, 0x00000100,
   1366	0x3c2c4, 0xffffffff, 0x00000100,
   1367	0x55e4, 0xffffffff, 0x00600100,
   1368	0x3c280, 0xffffffff, 0x00000100,
   1369	0x3c214, 0xffffffff, 0x06000100,
   1370	0x3c220, 0xffffffff, 0x00000100,
   1371	0x3c218, 0xffffffff, 0x06000100,
   1372	0x3c204, 0xffffffff, 0x00000100,
   1373	0x3c2e0, 0xffffffff, 0x00000100,
   1374	0x3c224, 0xffffffff, 0x00000100,
   1375	0x3c200, 0xffffffff, 0x00000100,
   1376	0x3c230, 0xffffffff, 0x00000100,
   1377	0x3c234, 0xffffffff, 0x00000100,
   1378	0x3c250, 0xffffffff, 0x00000100,
   1379	0x3c254, 0xffffffff, 0x00000100,
   1380	0x3c258, 0xffffffff, 0x00000100,
   1381	0x3c25c, 0xffffffff, 0x00000100,
   1382	0x3c260, 0xffffffff, 0x00000100,
   1383	0x3c27c, 0xffffffff, 0x00000100,
   1384	0x3c278, 0xffffffff, 0x00000100,
   1385	0x3c210, 0xffffffff, 0x06000100,
   1386	0x3c290, 0xffffffff, 0x00000100,
   1387	0x3c274, 0xffffffff, 0x00000100,
   1388	0x3c2b4, 0xffffffff, 0x00000100,
   1389	0x3c2b0, 0xffffffff, 0x00000100,
   1390	0x3c270, 0xffffffff, 0x00000100,
   1391	0x30800, 0xffffffff, 0xe0000000,
   1392	0x3c020, 0xffffffff, 0x00010000,
   1393	0x3c024, 0xffffffff, 0x00030002,
   1394	0x3c028, 0xffffffff, 0x00040007,
   1395	0x3c02c, 0xffffffff, 0x00060005,
   1396	0x3c030, 0xffffffff, 0x00090008,
   1397	0x3c034, 0xffffffff, 0x00010000,
   1398	0x3c038, 0xffffffff, 0x00030002,
   1399	0x3c03c, 0xffffffff, 0x00040007,
   1400	0x3c040, 0xffffffff, 0x00060005,
   1401	0x3c044, 0xffffffff, 0x00090008,
   1402	0x3c000, 0xffffffff, 0x96e00200,
   1403	0x8708, 0xffffffff, 0x00900100,
   1404	0xc424, 0xffffffff, 0x0020003f,
   1405	0x38, 0xffffffff, 0x0140001c,
   1406	0x3c, 0x000f0000, 0x000f0000,
   1407	0x220, 0xffffffff, 0xC060000C,
   1408	0x224, 0xc0000fff, 0x00000100,
   1409	0x20a8, 0xffffffff, 0x00000104,
   1410	0x55e4, 0xff000fff, 0x00000100,
   1411	0x30cc, 0xc0000fff, 0x00000104,
   1412	0xc1e4, 0x00000001, 0x00000001,
   1413	0xd00c, 0xff000ff0, 0x00000100,
   1414	0xd80c, 0xff000ff0, 0x00000100
   1415};
   1416
   1417static const u32 hawaii_golden_spm_registers[] =
   1418{
   1419	0x30800, 0xe0ffffff, 0xe0000000
   1420};
   1421
   1422static const u32 hawaii_golden_common_registers[] =
   1423{
   1424	0x30800, 0xffffffff, 0xe0000000,
   1425	0x28350, 0xffffffff, 0x3a00161a,
   1426	0x28354, 0xffffffff, 0x0000002e,
   1427	0x9a10, 0xffffffff, 0x00018208,
   1428	0x98f8, 0xffffffff, 0x12011003
   1429};
   1430
   1431static const u32 hawaii_golden_registers[] =
   1432{
   1433	0x3354, 0x00000333, 0x00000333,
   1434	0x9a10, 0x00010000, 0x00058208,
   1435	0x9830, 0xffffffff, 0x00000000,
   1436	0x9834, 0xf00fffff, 0x00000400,
   1437	0x9838, 0x0002021c, 0x00020200,
   1438	0xc78, 0x00000080, 0x00000000,
   1439	0x5bb0, 0x000000f0, 0x00000070,
   1440	0x5bc0, 0xf0311fff, 0x80300000,
   1441	0x350c, 0x00810000, 0x408af000,
   1442	0x7030, 0x31000111, 0x00000011,
   1443	0x2f48, 0x73773777, 0x12010001,
   1444	0x2120, 0x0000007f, 0x0000001b,
   1445	0x21dc, 0x00007fb6, 0x00002191,
   1446	0x3628, 0x0000003f, 0x0000000a,
   1447	0x362c, 0x0000003f, 0x0000000a,
   1448	0x2ae4, 0x00073ffe, 0x000022a2,
   1449	0x240c, 0x000007ff, 0x00000000,
   1450	0x8bf0, 0x00002001, 0x00000001,
   1451	0x8b24, 0xffffffff, 0x00ffffff,
   1452	0x30a04, 0x0000ff0f, 0x00000000,
   1453	0x28a4c, 0x07ffffff, 0x06000000,
   1454	0x3e78, 0x00000001, 0x00000002,
   1455	0xc768, 0x00000008, 0x00000008,
   1456	0xc770, 0x00000f00, 0x00000800,
   1457	0xc774, 0x00000f00, 0x00000800,
   1458	0xc798, 0x00ffffff, 0x00ff7fbf,
   1459	0xc79c, 0x00ffffff, 0x00ff7faf,
   1460	0x8c00, 0x000000ff, 0x00000800,
   1461	0xe40, 0x00001fff, 0x00001fff,
   1462	0x9060, 0x0000007f, 0x00000020,
   1463	0x9508, 0x00010000, 0x00010000,
   1464	0xae00, 0x00100000, 0x000ff07c,
   1465	0xac14, 0x000003ff, 0x0000000f,
   1466	0xac10, 0xffffffff, 0x7564fdec,
   1467	0xac0c, 0xffffffff, 0x3120b9a8,
   1468	0xac08, 0x20000000, 0x0f9c0000
   1469};
   1470
   1471static const u32 hawaii_mgcg_cgcg_init[] =
   1472{
   1473	0xc420, 0xffffffff, 0xfffffffd,
   1474	0x30800, 0xffffffff, 0xe0000000,
   1475	0x3c2a0, 0xffffffff, 0x00000100,
   1476	0x3c208, 0xffffffff, 0x00000100,
   1477	0x3c2c0, 0xffffffff, 0x00000100,
   1478	0x3c2c8, 0xffffffff, 0x00000100,
   1479	0x3c2c4, 0xffffffff, 0x00000100,
   1480	0x55e4, 0xffffffff, 0x00200100,
   1481	0x3c280, 0xffffffff, 0x00000100,
   1482	0x3c214, 0xffffffff, 0x06000100,
   1483	0x3c220, 0xffffffff, 0x00000100,
   1484	0x3c218, 0xffffffff, 0x06000100,
   1485	0x3c204, 0xffffffff, 0x00000100,
   1486	0x3c2e0, 0xffffffff, 0x00000100,
   1487	0x3c224, 0xffffffff, 0x00000100,
   1488	0x3c200, 0xffffffff, 0x00000100,
   1489	0x3c230, 0xffffffff, 0x00000100,
   1490	0x3c234, 0xffffffff, 0x00000100,
   1491	0x3c250, 0xffffffff, 0x00000100,
   1492	0x3c254, 0xffffffff, 0x00000100,
   1493	0x3c258, 0xffffffff, 0x00000100,
   1494	0x3c25c, 0xffffffff, 0x00000100,
   1495	0x3c260, 0xffffffff, 0x00000100,
   1496	0x3c27c, 0xffffffff, 0x00000100,
   1497	0x3c278, 0xffffffff, 0x00000100,
   1498	0x3c210, 0xffffffff, 0x06000100,
   1499	0x3c290, 0xffffffff, 0x00000100,
   1500	0x3c274, 0xffffffff, 0x00000100,
   1501	0x3c2b4, 0xffffffff, 0x00000100,
   1502	0x3c2b0, 0xffffffff, 0x00000100,
   1503	0x3c270, 0xffffffff, 0x00000100,
   1504	0x30800, 0xffffffff, 0xe0000000,
   1505	0x3c020, 0xffffffff, 0x00010000,
   1506	0x3c024, 0xffffffff, 0x00030002,
   1507	0x3c028, 0xffffffff, 0x00040007,
   1508	0x3c02c, 0xffffffff, 0x00060005,
   1509	0x3c030, 0xffffffff, 0x00090008,
   1510	0x3c034, 0xffffffff, 0x00010000,
   1511	0x3c038, 0xffffffff, 0x00030002,
   1512	0x3c03c, 0xffffffff, 0x00040007,
   1513	0x3c040, 0xffffffff, 0x00060005,
   1514	0x3c044, 0xffffffff, 0x00090008,
   1515	0x3c048, 0xffffffff, 0x00010000,
   1516	0x3c04c, 0xffffffff, 0x00030002,
   1517	0x3c050, 0xffffffff, 0x00040007,
   1518	0x3c054, 0xffffffff, 0x00060005,
   1519	0x3c058, 0xffffffff, 0x00090008,
   1520	0x3c05c, 0xffffffff, 0x00010000,
   1521	0x3c060, 0xffffffff, 0x00030002,
   1522	0x3c064, 0xffffffff, 0x00040007,
   1523	0x3c068, 0xffffffff, 0x00060005,
   1524	0x3c06c, 0xffffffff, 0x00090008,
   1525	0x3c070, 0xffffffff, 0x00010000,
   1526	0x3c074, 0xffffffff, 0x00030002,
   1527	0x3c078, 0xffffffff, 0x00040007,
   1528	0x3c07c, 0xffffffff, 0x00060005,
   1529	0x3c080, 0xffffffff, 0x00090008,
   1530	0x3c084, 0xffffffff, 0x00010000,
   1531	0x3c088, 0xffffffff, 0x00030002,
   1532	0x3c08c, 0xffffffff, 0x00040007,
   1533	0x3c090, 0xffffffff, 0x00060005,
   1534	0x3c094, 0xffffffff, 0x00090008,
   1535	0x3c098, 0xffffffff, 0x00010000,
   1536	0x3c09c, 0xffffffff, 0x00030002,
   1537	0x3c0a0, 0xffffffff, 0x00040007,
   1538	0x3c0a4, 0xffffffff, 0x00060005,
   1539	0x3c0a8, 0xffffffff, 0x00090008,
   1540	0x3c0ac, 0xffffffff, 0x00010000,
   1541	0x3c0b0, 0xffffffff, 0x00030002,
   1542	0x3c0b4, 0xffffffff, 0x00040007,
   1543	0x3c0b8, 0xffffffff, 0x00060005,
   1544	0x3c0bc, 0xffffffff, 0x00090008,
   1545	0x3c0c0, 0xffffffff, 0x00010000,
   1546	0x3c0c4, 0xffffffff, 0x00030002,
   1547	0x3c0c8, 0xffffffff, 0x00040007,
   1548	0x3c0cc, 0xffffffff, 0x00060005,
   1549	0x3c0d0, 0xffffffff, 0x00090008,
   1550	0x3c0d4, 0xffffffff, 0x00010000,
   1551	0x3c0d8, 0xffffffff, 0x00030002,
   1552	0x3c0dc, 0xffffffff, 0x00040007,
   1553	0x3c0e0, 0xffffffff, 0x00060005,
   1554	0x3c0e4, 0xffffffff, 0x00090008,
   1555	0x3c0e8, 0xffffffff, 0x00010000,
   1556	0x3c0ec, 0xffffffff, 0x00030002,
   1557	0x3c0f0, 0xffffffff, 0x00040007,
   1558	0x3c0f4, 0xffffffff, 0x00060005,
   1559	0x3c0f8, 0xffffffff, 0x00090008,
   1560	0xc318, 0xffffffff, 0x00020200,
   1561	0x3350, 0xffffffff, 0x00000200,
   1562	0x15c0, 0xffffffff, 0x00000400,
   1563	0x55e8, 0xffffffff, 0x00000000,
   1564	0x2f50, 0xffffffff, 0x00000902,
   1565	0x3c000, 0xffffffff, 0x96940200,
   1566	0x8708, 0xffffffff, 0x00900100,
   1567	0xc424, 0xffffffff, 0x0020003f,
   1568	0x38, 0xffffffff, 0x0140001c,
   1569	0x3c, 0x000f0000, 0x000f0000,
   1570	0x220, 0xffffffff, 0xc060000c,
   1571	0x224, 0xc0000fff, 0x00000100,
   1572	0xf90, 0xffffffff, 0x00000100,
   1573	0xf98, 0x00000101, 0x00000000,
   1574	0x20a8, 0xffffffff, 0x00000104,
   1575	0x55e4, 0xff000fff, 0x00000100,
   1576	0x30cc, 0xc0000fff, 0x00000104,
   1577	0xc1e4, 0x00000001, 0x00000001,
   1578	0xd00c, 0xff000ff0, 0x00000100,
   1579	0xd80c, 0xff000ff0, 0x00000100
   1580};
   1581
   1582static const u32 godavari_golden_registers[] =
   1583{
   1584	0x55e4, 0xff607fff, 0xfc000100,
   1585	0x6ed8, 0x00010101, 0x00010000,
   1586	0x9830, 0xffffffff, 0x00000000,
   1587	0x98302, 0xf00fffff, 0x00000400,
   1588	0x6130, 0xffffffff, 0x00010000,
   1589	0x5bb0, 0x000000f0, 0x00000070,
   1590	0x5bc0, 0xf0311fff, 0x80300000,
   1591	0x98f8, 0x73773777, 0x12010001,
   1592	0x98fc, 0xffffffff, 0x00000010,
   1593	0x8030, 0x00001f0f, 0x0000100a,
   1594	0x2f48, 0x73773777, 0x12010001,
   1595	0x2408, 0x000fffff, 0x000c007f,
   1596	0x8a14, 0xf000003f, 0x00000007,
   1597	0x8b24, 0xffffffff, 0x00ff0fff,
   1598	0x30a04, 0x0000ff0f, 0x00000000,
   1599	0x28a4c, 0x07ffffff, 0x06000000,
   1600	0x4d8, 0x00000fff, 0x00000100,
   1601	0xd014, 0x00010000, 0x00810001,
   1602	0xd814, 0x00010000, 0x00810001,
   1603	0x3e78, 0x00000001, 0x00000002,
   1604	0xc768, 0x00000008, 0x00000008,
   1605	0xc770, 0x00000f00, 0x00000800,
   1606	0xc774, 0x00000f00, 0x00000800,
   1607	0xc798, 0x00ffffff, 0x00ff7fbf,
   1608	0xc79c, 0x00ffffff, 0x00ff7faf,
   1609	0x8c00, 0x000000ff, 0x00000001,
   1610	0x214f8, 0x01ff01ff, 0x00000002,
   1611	0x21498, 0x007ff800, 0x00200000,
   1612	0x2015c, 0xffffffff, 0x00000f40,
   1613	0x88c4, 0x001f3ae3, 0x00000082,
   1614	0x88d4, 0x0000001f, 0x00000010,
   1615	0x30934, 0xffffffff, 0x00000000
   1616};
   1617
   1618
   1619static void cik_init_golden_registers(struct radeon_device *rdev)
   1620{
   1621	switch (rdev->family) {
   1622	case CHIP_BONAIRE:
   1623		radeon_program_register_sequence(rdev,
   1624						 bonaire_mgcg_cgcg_init,
   1625						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
   1626		radeon_program_register_sequence(rdev,
   1627						 bonaire_golden_registers,
   1628						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
   1629		radeon_program_register_sequence(rdev,
   1630						 bonaire_golden_common_registers,
   1631						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
   1632		radeon_program_register_sequence(rdev,
   1633						 bonaire_golden_spm_registers,
   1634						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
   1635		break;
   1636	case CHIP_KABINI:
   1637		radeon_program_register_sequence(rdev,
   1638						 kalindi_mgcg_cgcg_init,
   1639						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
   1640		radeon_program_register_sequence(rdev,
   1641						 kalindi_golden_registers,
   1642						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
   1643		radeon_program_register_sequence(rdev,
   1644						 kalindi_golden_common_registers,
   1645						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
   1646		radeon_program_register_sequence(rdev,
   1647						 kalindi_golden_spm_registers,
   1648						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
   1649		break;
   1650	case CHIP_MULLINS:
   1651		radeon_program_register_sequence(rdev,
   1652						 kalindi_mgcg_cgcg_init,
   1653						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
   1654		radeon_program_register_sequence(rdev,
   1655						 godavari_golden_registers,
   1656						 (const u32)ARRAY_SIZE(godavari_golden_registers));
   1657		radeon_program_register_sequence(rdev,
   1658						 kalindi_golden_common_registers,
   1659						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
   1660		radeon_program_register_sequence(rdev,
   1661						 kalindi_golden_spm_registers,
   1662						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
   1663		break;
   1664	case CHIP_KAVERI:
   1665		radeon_program_register_sequence(rdev,
   1666						 spectre_mgcg_cgcg_init,
   1667						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
   1668		radeon_program_register_sequence(rdev,
   1669						 spectre_golden_registers,
   1670						 (const u32)ARRAY_SIZE(spectre_golden_registers));
   1671		radeon_program_register_sequence(rdev,
   1672						 spectre_golden_common_registers,
   1673						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
   1674		radeon_program_register_sequence(rdev,
   1675						 spectre_golden_spm_registers,
   1676						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
   1677		break;
   1678	case CHIP_HAWAII:
   1679		radeon_program_register_sequence(rdev,
   1680						 hawaii_mgcg_cgcg_init,
   1681						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
   1682		radeon_program_register_sequence(rdev,
   1683						 hawaii_golden_registers,
   1684						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
   1685		radeon_program_register_sequence(rdev,
   1686						 hawaii_golden_common_registers,
   1687						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
   1688		radeon_program_register_sequence(rdev,
   1689						 hawaii_golden_spm_registers,
   1690						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
   1691		break;
   1692	default:
   1693		break;
   1694	}
   1695}
   1696
   1697/**
   1698 * cik_get_xclk - get the xclk
   1699 *
   1700 * @rdev: radeon_device pointer
   1701 *
   1702 * Returns the reference clock used by the gfx engine
   1703 * (CIK).
   1704 */
   1705u32 cik_get_xclk(struct radeon_device *rdev)
   1706{
   1707	u32 reference_clock = rdev->clock.spll.reference_freq;
   1708
   1709	if (rdev->flags & RADEON_IS_IGP) {
   1710		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
   1711			return reference_clock / 2;
   1712	} else {
   1713		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
   1714			return reference_clock / 4;
   1715	}
   1716	return reference_clock;
   1717}
   1718
   1719/**
   1720 * cik_mm_rdoorbell - read a doorbell dword
   1721 *
   1722 * @rdev: radeon_device pointer
   1723 * @index: doorbell index
   1724 *
   1725 * Returns the value in the doorbell aperture at the
   1726 * requested doorbell index (CIK).
   1727 */
   1728u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
   1729{
   1730	if (index < rdev->doorbell.num_doorbells) {
   1731		return readl(rdev->doorbell.ptr + index);
   1732	} else {
   1733		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
   1734		return 0;
   1735	}
   1736}
   1737
   1738/**
   1739 * cik_mm_wdoorbell - write a doorbell dword
   1740 *
   1741 * @rdev: radeon_device pointer
   1742 * @index: doorbell index
   1743 * @v: value to write
   1744 *
   1745 * Writes @v to the doorbell aperture at the
   1746 * requested doorbell index (CIK).
   1747 */
   1748void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
   1749{
   1750	if (index < rdev->doorbell.num_doorbells) {
   1751		writel(v, rdev->doorbell.ptr + index);
   1752	} else {
   1753		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
   1754	}
   1755}
   1756
   1757#define BONAIRE_IO_MC_REGS_SIZE 36
   1758
   1759static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
   1760{
   1761	{0x00000070, 0x04400000},
   1762	{0x00000071, 0x80c01803},
   1763	{0x00000072, 0x00004004},
   1764	{0x00000073, 0x00000100},
   1765	{0x00000074, 0x00ff0000},
   1766	{0x00000075, 0x34000000},
   1767	{0x00000076, 0x08000014},
   1768	{0x00000077, 0x00cc08ec},
   1769	{0x00000078, 0x00000400},
   1770	{0x00000079, 0x00000000},
   1771	{0x0000007a, 0x04090000},
   1772	{0x0000007c, 0x00000000},
   1773	{0x0000007e, 0x4408a8e8},
   1774	{0x0000007f, 0x00000304},
   1775	{0x00000080, 0x00000000},
   1776	{0x00000082, 0x00000001},
   1777	{0x00000083, 0x00000002},
   1778	{0x00000084, 0xf3e4f400},
   1779	{0x00000085, 0x052024e3},
   1780	{0x00000087, 0x00000000},
   1781	{0x00000088, 0x01000000},
   1782	{0x0000008a, 0x1c0a0000},
   1783	{0x0000008b, 0xff010000},
   1784	{0x0000008d, 0xffffefff},
   1785	{0x0000008e, 0xfff3efff},
   1786	{0x0000008f, 0xfff3efbf},
   1787	{0x00000092, 0xf7ffffff},
   1788	{0x00000093, 0xffffff7f},
   1789	{0x00000095, 0x00101101},
   1790	{0x00000096, 0x00000fff},
   1791	{0x00000097, 0x00116fff},
   1792	{0x00000098, 0x60010000},
   1793	{0x00000099, 0x10010000},
   1794	{0x0000009a, 0x00006000},
   1795	{0x0000009b, 0x00001000},
   1796	{0x0000009f, 0x00b48000}
   1797};
   1798
   1799#define HAWAII_IO_MC_REGS_SIZE 22
   1800
   1801static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
   1802{
   1803	{0x0000007d, 0x40000000},
   1804	{0x0000007e, 0x40180304},
   1805	{0x0000007f, 0x0000ff00},
   1806	{0x00000081, 0x00000000},
   1807	{0x00000083, 0x00000800},
   1808	{0x00000086, 0x00000000},
   1809	{0x00000087, 0x00000100},
   1810	{0x00000088, 0x00020100},
   1811	{0x00000089, 0x00000000},
   1812	{0x0000008b, 0x00040000},
   1813	{0x0000008c, 0x00000100},
   1814	{0x0000008e, 0xff010000},
   1815	{0x00000090, 0xffffefff},
   1816	{0x00000091, 0xfff3efff},
   1817	{0x00000092, 0xfff3efbf},
   1818	{0x00000093, 0xf7ffffff},
   1819	{0x00000094, 0xffffff7f},
   1820	{0x00000095, 0x00000fff},
   1821	{0x00000096, 0x00116fff},
   1822	{0x00000097, 0x60010000},
   1823	{0x00000098, 0x10010000},
   1824	{0x0000009f, 0x00c79000}
   1825};
   1826
   1827
   1828/**
   1829 * cik_srbm_select - select specific register instances
   1830 *
   1831 * @rdev: radeon_device pointer
   1832 * @me: selected ME (micro engine)
   1833 * @pipe: pipe
   1834 * @queue: queue
   1835 * @vmid: VMID
   1836 *
   1837 * Switches the currently active registers instances.  Some
   1838 * registers are instanced per VMID, others are instanced per
   1839 * me/pipe/queue combination.
   1840 */
   1841static void cik_srbm_select(struct radeon_device *rdev,
   1842			    u32 me, u32 pipe, u32 queue, u32 vmid)
   1843{
   1844	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
   1845			     MEID(me & 0x3) |
   1846			     VMID(vmid & 0xf) |
   1847			     QUEUEID(queue & 0x7));
   1848	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
   1849}
   1850
   1851/* ucode loading */
   1852/**
   1853 * ci_mc_load_microcode - load MC ucode into the hw
   1854 *
   1855 * @rdev: radeon_device pointer
   1856 *
   1857 * Load the GDDR MC ucode into the hw (CIK).
   1858 * Returns 0 on success, error on failure.
   1859 */
   1860int ci_mc_load_microcode(struct radeon_device *rdev)
   1861{
   1862	const __be32 *fw_data = NULL;
   1863	const __le32 *new_fw_data = NULL;
   1864	u32 running, tmp;
   1865	u32 *io_mc_regs = NULL;
   1866	const __le32 *new_io_mc_regs = NULL;
   1867	int i, regs_size, ucode_size;
   1868
   1869	if (!rdev->mc_fw)
   1870		return -EINVAL;
   1871
   1872	if (rdev->new_fw) {
   1873		const struct mc_firmware_header_v1_0 *hdr =
   1874			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
   1875
   1876		radeon_ucode_print_mc_hdr(&hdr->header);
   1877
   1878		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
   1879		new_io_mc_regs = (const __le32 *)
   1880			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
   1881		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
   1882		new_fw_data = (const __le32 *)
   1883			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   1884	} else {
   1885		ucode_size = rdev->mc_fw->size / 4;
   1886
   1887		switch (rdev->family) {
   1888		case CHIP_BONAIRE:
   1889			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
   1890			regs_size = BONAIRE_IO_MC_REGS_SIZE;
   1891			break;
   1892		case CHIP_HAWAII:
   1893			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
   1894			regs_size = HAWAII_IO_MC_REGS_SIZE;
   1895			break;
   1896		default:
   1897			return -EINVAL;
   1898		}
   1899		fw_data = (const __be32 *)rdev->mc_fw->data;
   1900	}
   1901
   1902	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
   1903
   1904	if (running == 0) {
   1905		/* reset the engine and set to writable */
   1906		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
   1907		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
   1908
   1909		/* load mc io regs */
   1910		for (i = 0; i < regs_size; i++) {
   1911			if (rdev->new_fw) {
   1912				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
   1913				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
   1914			} else {
   1915				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
   1916				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
   1917			}
   1918		}
   1919
   1920		tmp = RREG32(MC_SEQ_MISC0);
   1921		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
   1922			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
   1923			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
   1924			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
   1925			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
   1926		}
   1927
   1928		/* load the MC ucode */
   1929		for (i = 0; i < ucode_size; i++) {
   1930			if (rdev->new_fw)
   1931				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
   1932			else
   1933				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
   1934		}
   1935
   1936		/* put the engine back into the active state */
   1937		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
   1938		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
   1939		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
   1940
   1941		/* wait for training to complete */
   1942		for (i = 0; i < rdev->usec_timeout; i++) {
   1943			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
   1944				break;
   1945			udelay(1);
   1946		}
   1947		for (i = 0; i < rdev->usec_timeout; i++) {
   1948			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
   1949				break;
   1950			udelay(1);
   1951		}
   1952	}
   1953
   1954	return 0;
   1955}
   1956
   1957/**
   1958 * cik_init_microcode - load ucode images from disk
   1959 *
   1960 * @rdev: radeon_device pointer
   1961 *
   1962 * Use the firmware interface to load the ucode images into
   1963 * the driver (not loaded into hw).
   1964 * Returns 0 on success, error on failure.
   1965 */
   1966static int cik_init_microcode(struct radeon_device *rdev)
   1967{
   1968	const char *chip_name;
   1969	const char *new_chip_name;
   1970	size_t pfp_req_size, me_req_size, ce_req_size,
   1971		mec_req_size, rlc_req_size, mc_req_size = 0,
   1972		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
   1973	char fw_name[30];
   1974	int new_fw = 0;
   1975	int err;
   1976	int num_fw;
   1977	bool new_smc = false;
   1978
   1979	DRM_DEBUG("\n");
   1980
   1981	switch (rdev->family) {
   1982	case CHIP_BONAIRE:
   1983		chip_name = "BONAIRE";
   1984		if ((rdev->pdev->revision == 0x80) ||
   1985		    (rdev->pdev->revision == 0x81) ||
   1986		    (rdev->pdev->device == 0x665f))
   1987			new_smc = true;
   1988		new_chip_name = "bonaire";
   1989		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
   1990		me_req_size = CIK_ME_UCODE_SIZE * 4;
   1991		ce_req_size = CIK_CE_UCODE_SIZE * 4;
   1992		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
   1993		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
   1994		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
   1995		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
   1996		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
   1997		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
   1998		num_fw = 8;
   1999		break;
   2000	case CHIP_HAWAII:
   2001		chip_name = "HAWAII";
   2002		if (rdev->pdev->revision == 0x80)
   2003			new_smc = true;
   2004		new_chip_name = "hawaii";
   2005		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
   2006		me_req_size = CIK_ME_UCODE_SIZE * 4;
   2007		ce_req_size = CIK_CE_UCODE_SIZE * 4;
   2008		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
   2009		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
   2010		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
   2011		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
   2012		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
   2013		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
   2014		num_fw = 8;
   2015		break;
   2016	case CHIP_KAVERI:
   2017		chip_name = "KAVERI";
   2018		new_chip_name = "kaveri";
   2019		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
   2020		me_req_size = CIK_ME_UCODE_SIZE * 4;
   2021		ce_req_size = CIK_CE_UCODE_SIZE * 4;
   2022		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
   2023		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
   2024		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
   2025		num_fw = 7;
   2026		break;
   2027	case CHIP_KABINI:
   2028		chip_name = "KABINI";
   2029		new_chip_name = "kabini";
   2030		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
   2031		me_req_size = CIK_ME_UCODE_SIZE * 4;
   2032		ce_req_size = CIK_CE_UCODE_SIZE * 4;
   2033		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
   2034		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
   2035		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
   2036		num_fw = 6;
   2037		break;
   2038	case CHIP_MULLINS:
   2039		chip_name = "MULLINS";
   2040		new_chip_name = "mullins";
   2041		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
   2042		me_req_size = CIK_ME_UCODE_SIZE * 4;
   2043		ce_req_size = CIK_CE_UCODE_SIZE * 4;
   2044		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
   2045		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
   2046		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
   2047		num_fw = 6;
   2048		break;
   2049	default: BUG();
   2050	}
   2051
   2052	DRM_INFO("Loading %s Microcode\n", new_chip_name);
   2053
   2054	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
   2055	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
   2056	if (err) {
   2057		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
   2058		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
   2059		if (err)
   2060			goto out;
   2061		if (rdev->pfp_fw->size != pfp_req_size) {
   2062			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
   2063			       rdev->pfp_fw->size, fw_name);
   2064			err = -EINVAL;
   2065			goto out;
   2066		}
   2067	} else {
   2068		err = radeon_ucode_validate(rdev->pfp_fw);
   2069		if (err) {
   2070			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
   2071			       fw_name);
   2072			goto out;
   2073		} else {
   2074			new_fw++;
   2075		}
   2076	}
   2077
   2078	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
   2079	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
   2080	if (err) {
   2081		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
   2082		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
   2083		if (err)
   2084			goto out;
   2085		if (rdev->me_fw->size != me_req_size) {
   2086			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
   2087			       rdev->me_fw->size, fw_name);
   2088			err = -EINVAL;
   2089		}
   2090	} else {
   2091		err = radeon_ucode_validate(rdev->me_fw);
   2092		if (err) {
   2093			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
   2094			       fw_name);
   2095			goto out;
   2096		} else {
   2097			new_fw++;
   2098		}
   2099	}
   2100
   2101	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
   2102	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
   2103	if (err) {
   2104		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
   2105		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
   2106		if (err)
   2107			goto out;
   2108		if (rdev->ce_fw->size != ce_req_size) {
   2109			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
   2110			       rdev->ce_fw->size, fw_name);
   2111			err = -EINVAL;
   2112		}
   2113	} else {
   2114		err = radeon_ucode_validate(rdev->ce_fw);
   2115		if (err) {
   2116			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
   2117			       fw_name);
   2118			goto out;
   2119		} else {
   2120			new_fw++;
   2121		}
   2122	}
   2123
   2124	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
   2125	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
   2126	if (err) {
   2127		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
   2128		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
   2129		if (err)
   2130			goto out;
   2131		if (rdev->mec_fw->size != mec_req_size) {
   2132			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
   2133			       rdev->mec_fw->size, fw_name);
   2134			err = -EINVAL;
   2135		}
   2136	} else {
   2137		err = radeon_ucode_validate(rdev->mec_fw);
   2138		if (err) {
   2139			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
   2140			       fw_name);
   2141			goto out;
   2142		} else {
   2143			new_fw++;
   2144		}
   2145	}
   2146
   2147	if (rdev->family == CHIP_KAVERI) {
   2148		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
   2149		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
   2150		if (err) {
   2151			goto out;
   2152		} else {
   2153			err = radeon_ucode_validate(rdev->mec2_fw);
   2154			if (err) {
   2155				goto out;
   2156			} else {
   2157				new_fw++;
   2158			}
   2159		}
   2160	}
   2161
   2162	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
   2163	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
   2164	if (err) {
   2165		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
   2166		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
   2167		if (err)
   2168			goto out;
   2169		if (rdev->rlc_fw->size != rlc_req_size) {
   2170			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
   2171			       rdev->rlc_fw->size, fw_name);
   2172			err = -EINVAL;
   2173		}
   2174	} else {
   2175		err = radeon_ucode_validate(rdev->rlc_fw);
   2176		if (err) {
   2177			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
   2178			       fw_name);
   2179			goto out;
   2180		} else {
   2181			new_fw++;
   2182		}
   2183	}
   2184
   2185	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
   2186	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
   2187	if (err) {
   2188		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
   2189		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
   2190		if (err)
   2191			goto out;
   2192		if (rdev->sdma_fw->size != sdma_req_size) {
   2193			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
   2194			       rdev->sdma_fw->size, fw_name);
   2195			err = -EINVAL;
   2196		}
   2197	} else {
   2198		err = radeon_ucode_validate(rdev->sdma_fw);
   2199		if (err) {
   2200			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
   2201			       fw_name);
   2202			goto out;
   2203		} else {
   2204			new_fw++;
   2205		}
   2206	}
   2207
   2208	/* No SMC, MC ucode on APUs */
   2209	if (!(rdev->flags & RADEON_IS_IGP)) {
   2210		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
   2211		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
   2212		if (err) {
   2213			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
   2214			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
   2215			if (err) {
   2216				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
   2217				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
   2218				if (err)
   2219					goto out;
   2220			}
   2221			if ((rdev->mc_fw->size != mc_req_size) &&
   2222			    (rdev->mc_fw->size != mc2_req_size)){
   2223				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
   2224				       rdev->mc_fw->size, fw_name);
   2225				err = -EINVAL;
   2226			}
   2227			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
   2228		} else {
   2229			err = radeon_ucode_validate(rdev->mc_fw);
   2230			if (err) {
   2231				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
   2232				       fw_name);
   2233				goto out;
   2234			} else {
   2235				new_fw++;
   2236			}
   2237		}
   2238
   2239		if (new_smc)
   2240			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
   2241		else
   2242			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
   2243		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
   2244		if (err) {
   2245			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
   2246			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
   2247			if (err) {
   2248				pr_err("smc: error loading firmware \"%s\"\n",
   2249				       fw_name);
   2250				release_firmware(rdev->smc_fw);
   2251				rdev->smc_fw = NULL;
   2252				err = 0;
   2253			} else if (rdev->smc_fw->size != smc_req_size) {
   2254				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
   2255				       rdev->smc_fw->size, fw_name);
   2256				err = -EINVAL;
   2257			}
   2258		} else {
   2259			err = radeon_ucode_validate(rdev->smc_fw);
   2260			if (err) {
   2261				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
   2262				       fw_name);
   2263				goto out;
   2264			} else {
   2265				new_fw++;
   2266			}
   2267		}
   2268	}
   2269
   2270	if (new_fw == 0) {
   2271		rdev->new_fw = false;
   2272	} else if (new_fw < num_fw) {
   2273		pr_err("ci_fw: mixing new and old firmware!\n");
   2274		err = -EINVAL;
   2275	} else {
   2276		rdev->new_fw = true;
   2277	}
   2278
   2279out:
   2280	if (err) {
   2281		if (err != -EINVAL)
   2282			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
   2283			       fw_name);
   2284		release_firmware(rdev->pfp_fw);
   2285		rdev->pfp_fw = NULL;
   2286		release_firmware(rdev->me_fw);
   2287		rdev->me_fw = NULL;
   2288		release_firmware(rdev->ce_fw);
   2289		rdev->ce_fw = NULL;
   2290		release_firmware(rdev->mec_fw);
   2291		rdev->mec_fw = NULL;
   2292		release_firmware(rdev->mec2_fw);
   2293		rdev->mec2_fw = NULL;
   2294		release_firmware(rdev->rlc_fw);
   2295		rdev->rlc_fw = NULL;
   2296		release_firmware(rdev->sdma_fw);
   2297		rdev->sdma_fw = NULL;
   2298		release_firmware(rdev->mc_fw);
   2299		rdev->mc_fw = NULL;
   2300		release_firmware(rdev->smc_fw);
   2301		rdev->smc_fw = NULL;
   2302	}
   2303	return err;
   2304}
   2305
   2306/*
   2307 * Core functions
   2308 */
   2309/**
   2310 * cik_tiling_mode_table_init - init the hw tiling table
   2311 *
   2312 * @rdev: radeon_device pointer
   2313 *
   2314 * Starting with SI, the tiling setup is done globally in a
   2315 * set of 32 tiling modes.  Rather than selecting each set of
   2316 * parameters per surface as on older asics, we just select
   2317 * which index in the tiling table we want to use, and the
   2318 * surface uses those parameters (CIK).
   2319 */
   2320static void cik_tiling_mode_table_init(struct radeon_device *rdev)
   2321{
   2322	u32 *tile = rdev->config.cik.tile_mode_array;
   2323	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
   2324	const u32 num_tile_mode_states =
   2325			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
   2326	const u32 num_secondary_tile_mode_states =
   2327			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
   2328	u32 reg_offset, split_equal_to_row_size;
   2329	u32 num_pipe_configs;
   2330	u32 num_rbs = rdev->config.cik.max_backends_per_se *
   2331		rdev->config.cik.max_shader_engines;
   2332
   2333	switch (rdev->config.cik.mem_row_size_in_kb) {
   2334	case 1:
   2335		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
   2336		break;
   2337	case 2:
   2338	default:
   2339		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
   2340		break;
   2341	case 4:
   2342		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
   2343		break;
   2344	}
   2345
   2346	num_pipe_configs = rdev->config.cik.max_tile_pipes;
   2347	if (num_pipe_configs > 8)
   2348		num_pipe_configs = 16;
   2349
   2350	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   2351		tile[reg_offset] = 0;
   2352	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   2353		macrotile[reg_offset] = 0;
   2354
   2355	switch(num_pipe_configs) {
   2356	case 16:
   2357		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2358			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2359			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2360			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
   2361		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2362			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2363			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2364			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
   2365		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2366			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2367			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2368			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2369		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2370			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2371			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2372			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
   2373		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2374			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2375			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2376			   TILE_SPLIT(split_equal_to_row_size));
   2377		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2378			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2379			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2380		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2381			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2382			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2383			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2384		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2385			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2386			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2387			   TILE_SPLIT(split_equal_to_row_size));
   2388		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2389			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
   2390		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2391			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2392			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
   2393		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2394			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2395			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2396			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2397		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2398			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2399			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
   2400			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2401		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2402			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2403			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2404			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2405		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2406			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2407			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   2408		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2409			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2410			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2411			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2412		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2413			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2414			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
   2415			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2416		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2417			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2418			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2419			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2420		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2421			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2422			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
   2423		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2424			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2425			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2426			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2427		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2428			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2429			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
   2430			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2431		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2432			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2433			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
   2434			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2435
   2436		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2437			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2438			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2439			   NUM_BANKS(ADDR_SURF_16_BANK));
   2440		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2441			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2442			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2443			   NUM_BANKS(ADDR_SURF_16_BANK));
   2444		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2445			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2446			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2447			   NUM_BANKS(ADDR_SURF_16_BANK));
   2448		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2449			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2450			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2451			   NUM_BANKS(ADDR_SURF_16_BANK));
   2452		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2453			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2454			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2455			   NUM_BANKS(ADDR_SURF_8_BANK));
   2456		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2457			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2458			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2459			   NUM_BANKS(ADDR_SURF_4_BANK));
   2460		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2461			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2462			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2463			   NUM_BANKS(ADDR_SURF_2_BANK));
   2464		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2465			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2466			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2467			   NUM_BANKS(ADDR_SURF_16_BANK));
   2468		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2469			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2470			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2471			   NUM_BANKS(ADDR_SURF_16_BANK));
   2472		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2473			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2474			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2475			    NUM_BANKS(ADDR_SURF_16_BANK));
   2476		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2477			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2478			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2479			    NUM_BANKS(ADDR_SURF_8_BANK));
   2480		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2481			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2482			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2483			    NUM_BANKS(ADDR_SURF_4_BANK));
   2484		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2485			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2486			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2487			    NUM_BANKS(ADDR_SURF_2_BANK));
   2488		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2489			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2490			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2491			    NUM_BANKS(ADDR_SURF_2_BANK));
   2492
   2493		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   2494			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
   2495		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   2496			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
   2497		break;
   2498
   2499	case 8:
   2500		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2501			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2502			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2503			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
   2504		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2505			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2506			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2507			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
   2508		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2509			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2510			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2511			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2512		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2513			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2514			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2515			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
   2516		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2517			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2518			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2519			   TILE_SPLIT(split_equal_to_row_size));
   2520		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2521			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2522			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2523		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2524			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2525			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2526			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2527		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2528			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2529			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2530			   TILE_SPLIT(split_equal_to_row_size));
   2531		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2532			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
   2533		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2534			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2535			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
   2536		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2537			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2538			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2539			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2540		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2541			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2542			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
   2543			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2544		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2545			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2546			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2547			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2548		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2549			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2550			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   2551		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2552			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2553			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2554			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2555		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2556			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2557			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
   2558			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2559		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2560			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2561			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2562			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2563		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2564			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2565			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
   2566		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2567			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2568			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2569			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2570		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2571			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2572			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
   2573			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2574		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2575			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2576			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
   2577			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2578
   2579		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2580				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2581				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2582				NUM_BANKS(ADDR_SURF_16_BANK));
   2583		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2584				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2585				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2586				NUM_BANKS(ADDR_SURF_16_BANK));
   2587		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2588				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2589				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2590				NUM_BANKS(ADDR_SURF_16_BANK));
   2591		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2592				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2593				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2594				NUM_BANKS(ADDR_SURF_16_BANK));
   2595		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2596				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2597				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2598				NUM_BANKS(ADDR_SURF_8_BANK));
   2599		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2600				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2601				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2602				NUM_BANKS(ADDR_SURF_4_BANK));
   2603		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2604				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2605				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2606				NUM_BANKS(ADDR_SURF_2_BANK));
   2607		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2608				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   2609				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2610				NUM_BANKS(ADDR_SURF_16_BANK));
   2611		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2612				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2613				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2614				NUM_BANKS(ADDR_SURF_16_BANK));
   2615		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2616				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2617				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2618				NUM_BANKS(ADDR_SURF_16_BANK));
   2619		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2620				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2621				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2622				NUM_BANKS(ADDR_SURF_16_BANK));
   2623		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2624				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2625				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2626				NUM_BANKS(ADDR_SURF_8_BANK));
   2627		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2628				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2629				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2630				NUM_BANKS(ADDR_SURF_4_BANK));
   2631		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2632				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2633				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2634				NUM_BANKS(ADDR_SURF_2_BANK));
   2635
   2636		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   2637			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
   2638		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   2639			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
   2640		break;
   2641
   2642	case 4:
   2643		if (num_rbs == 4) {
   2644		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2645			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2646			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2647			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
   2648		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2649			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2650			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2651			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
   2652		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2653			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2654			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2655			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2656		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2657			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2658			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2659			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
   2660		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2661			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2662			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2663			   TILE_SPLIT(split_equal_to_row_size));
   2664		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2665			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2666			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2667		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2668			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2669			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2670			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2671		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2672			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2673			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2674			   TILE_SPLIT(split_equal_to_row_size));
   2675		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2676			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
   2677		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2678			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2679			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
   2680		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2681			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2682			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2683			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2684		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2685			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2686			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2687			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2688		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2689			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2690			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2691			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2692		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2693			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2694			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   2695		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2696			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2697			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2698			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2699		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2700			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2701			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2702			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2703		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2704			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2705			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2706			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2707		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2708			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2709			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
   2710		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2711			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2712			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2713			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2714		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2715			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2716			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2717			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2718		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2719			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2720			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
   2721			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2722
   2723		} else if (num_rbs < 4) {
   2724		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2725			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2726			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2727			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
   2728		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2729			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2730			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2731			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
   2732		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2733			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2734			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2735			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2736		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2737			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2738			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2739			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
   2740		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2741			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2742			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2743			   TILE_SPLIT(split_equal_to_row_size));
   2744		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2745			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2746			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2747		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2748			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2749			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2750			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2751		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2752			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2753			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2754			   TILE_SPLIT(split_equal_to_row_size));
   2755		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2756			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
   2757		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2758			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2759			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
   2760		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2761			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2762			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2763			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2764		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2765			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2766			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2767			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2768		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2769			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2770			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2771			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2772		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2773			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2774			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   2775		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2776			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2777			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2778			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2779		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2780			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2781			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2782			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2783		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2784			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2785			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2786			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2787		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2788			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2789			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
   2790		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2791			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2792			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2793			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2794		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2795			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2796			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2797			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2798		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2799			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2800			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
   2801			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2802		}
   2803
   2804		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2805				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2806				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2807				NUM_BANKS(ADDR_SURF_16_BANK));
   2808		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2809				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2810				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2811				NUM_BANKS(ADDR_SURF_16_BANK));
   2812		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2813				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2814				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2815				NUM_BANKS(ADDR_SURF_16_BANK));
   2816		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2817				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2818				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2819				NUM_BANKS(ADDR_SURF_16_BANK));
   2820		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2821				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2822				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2823				NUM_BANKS(ADDR_SURF_16_BANK));
   2824		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2825				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2826				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2827				NUM_BANKS(ADDR_SURF_8_BANK));
   2828		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2829				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2830				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2831				NUM_BANKS(ADDR_SURF_4_BANK));
   2832		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   2833				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   2834				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2835				NUM_BANKS(ADDR_SURF_16_BANK));
   2836		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   2837				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2838				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2839				NUM_BANKS(ADDR_SURF_16_BANK));
   2840		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2841				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2842				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2843				NUM_BANKS(ADDR_SURF_16_BANK));
   2844		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2845				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2846				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2847				NUM_BANKS(ADDR_SURF_16_BANK));
   2848		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2849				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2850				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2851				NUM_BANKS(ADDR_SURF_16_BANK));
   2852		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2853				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2854				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2855				NUM_BANKS(ADDR_SURF_8_BANK));
   2856		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2857				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2858				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
   2859				NUM_BANKS(ADDR_SURF_4_BANK));
   2860
   2861		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   2862			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
   2863		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   2864			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
   2865		break;
   2866
   2867	case 2:
   2868		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2869			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2870			   PIPE_CONFIG(ADDR_SURF_P2) |
   2871			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
   2872		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2873			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2874			   PIPE_CONFIG(ADDR_SURF_P2) |
   2875			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
   2876		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2877			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2878			   PIPE_CONFIG(ADDR_SURF_P2) |
   2879			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2880		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2881			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2882			   PIPE_CONFIG(ADDR_SURF_P2) |
   2883			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
   2884		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2885			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2886			   PIPE_CONFIG(ADDR_SURF_P2) |
   2887			   TILE_SPLIT(split_equal_to_row_size));
   2888		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2889			   PIPE_CONFIG(ADDR_SURF_P2) |
   2890			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
   2891		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2892			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2893			   PIPE_CONFIG(ADDR_SURF_P2) |
   2894			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
   2895		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2896			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
   2897			   PIPE_CONFIG(ADDR_SURF_P2) |
   2898			   TILE_SPLIT(split_equal_to_row_size));
   2899		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
   2900			   PIPE_CONFIG(ADDR_SURF_P2);
   2901		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2902			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2903			   PIPE_CONFIG(ADDR_SURF_P2));
   2904		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2905			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2906			    PIPE_CONFIG(ADDR_SURF_P2) |
   2907			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2908		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2909			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2910			    PIPE_CONFIG(ADDR_SURF_P2) |
   2911			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2912		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2913			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
   2914			    PIPE_CONFIG(ADDR_SURF_P2) |
   2915			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2916		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2917			    PIPE_CONFIG(ADDR_SURF_P2) |
   2918			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
   2919		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
   2920			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2921			    PIPE_CONFIG(ADDR_SURF_P2) |
   2922			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2923		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2924			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2925			    PIPE_CONFIG(ADDR_SURF_P2) |
   2926			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2927		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2928			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
   2929			    PIPE_CONFIG(ADDR_SURF_P2) |
   2930			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2931		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
   2932			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2933			    PIPE_CONFIG(ADDR_SURF_P2));
   2934		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2935			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2936			    PIPE_CONFIG(ADDR_SURF_P2) |
   2937			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2938		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
   2939			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2940			    PIPE_CONFIG(ADDR_SURF_P2) |
   2941			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2942		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
   2943			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
   2944			    PIPE_CONFIG(ADDR_SURF_P2) |
   2945			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
   2946
   2947		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   2948				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2949				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2950				NUM_BANKS(ADDR_SURF_16_BANK));
   2951		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   2952				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2953				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2954				NUM_BANKS(ADDR_SURF_16_BANK));
   2955		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2956				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2957				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2958				NUM_BANKS(ADDR_SURF_16_BANK));
   2959		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2960				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2961				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2962				NUM_BANKS(ADDR_SURF_16_BANK));
   2963		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2964				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2965				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2966				NUM_BANKS(ADDR_SURF_16_BANK));
   2967		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2968				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2969				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2970				NUM_BANKS(ADDR_SURF_16_BANK));
   2971		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2972				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2973				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   2974				NUM_BANKS(ADDR_SURF_8_BANK));
   2975		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   2976				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
   2977				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2978				NUM_BANKS(ADDR_SURF_16_BANK));
   2979		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
   2980				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2981				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2982				NUM_BANKS(ADDR_SURF_16_BANK));
   2983		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   2984				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
   2985				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2986				NUM_BANKS(ADDR_SURF_16_BANK));
   2987		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
   2988				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2989				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2990				NUM_BANKS(ADDR_SURF_16_BANK));
   2991		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2992				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
   2993				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2994				NUM_BANKS(ADDR_SURF_16_BANK));
   2995		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   2996				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   2997				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
   2998				NUM_BANKS(ADDR_SURF_16_BANK));
   2999		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
   3000				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
   3001				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
   3002				NUM_BANKS(ADDR_SURF_8_BANK));
   3003
   3004		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
   3005			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
   3006		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
   3007			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
   3008		break;
   3009
   3010	default:
   3011		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
   3012	}
   3013}
   3014
   3015/**
   3016 * cik_select_se_sh - select which SE, SH to address
   3017 *
   3018 * @rdev: radeon_device pointer
   3019 * @se_num: shader engine to address
   3020 * @sh_num: sh block to address
   3021 *
   3022 * Select which SE, SH combinations to address. Certain
   3023 * registers are instanced per SE or SH.  0xffffffff means
   3024 * broadcast to all SEs or SHs (CIK).
   3025 */
   3026static void cik_select_se_sh(struct radeon_device *rdev,
   3027			     u32 se_num, u32 sh_num)
   3028{
   3029	u32 data = INSTANCE_BROADCAST_WRITES;
   3030
   3031	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
   3032		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
   3033	else if (se_num == 0xffffffff)
   3034		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
   3035	else if (sh_num == 0xffffffff)
   3036		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
   3037	else
   3038		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
   3039	WREG32(GRBM_GFX_INDEX, data);
   3040}
   3041
   3042/**
   3043 * cik_create_bitmask - create a bitmask
   3044 *
   3045 * @bit_width: length of the mask
   3046 *
   3047 * create a variable length bit mask (CIK).
   3048 * Returns the bitmask.
   3049 */
   3050static u32 cik_create_bitmask(u32 bit_width)
   3051{
   3052	u32 i, mask = 0;
   3053
   3054	for (i = 0; i < bit_width; i++) {
   3055		mask <<= 1;
   3056		mask |= 1;
   3057	}
   3058	return mask;
   3059}
   3060
   3061/**
   3062 * cik_get_rb_disabled - computes the mask of disabled RBs
   3063 *
   3064 * @rdev: radeon_device pointer
   3065 * @max_rb_num_per_se: max RBs (render backends) per SE (shader engine) for the asic
   3066 * @sh_per_se: number of SH blocks per SE for the asic
   3067 *
   3068 * Calculates the bitmask of disabled RBs (CIK).
   3069 * Returns the disabled RB bitmask.
   3070 */
   3071static u32 cik_get_rb_disabled(struct radeon_device *rdev,
   3072			      u32 max_rb_num_per_se,
   3073			      u32 sh_per_se)
   3074{
   3075	u32 data, mask;
   3076
   3077	data = RREG32(CC_RB_BACKEND_DISABLE);
   3078	if (data & 1)
   3079		data &= BACKEND_DISABLE_MASK;
   3080	else
   3081		data = 0;
   3082	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
   3083
   3084	data >>= BACKEND_DISABLE_SHIFT;
   3085
   3086	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
   3087
   3088	return data & mask;
   3089}
   3090
   3091/**
   3092 * cik_setup_rb - setup the RBs on the asic
   3093 *
   3094 * @rdev: radeon_device pointer
   3095 * @se_num: number of SEs (shader engines) for the asic
   3096 * @sh_per_se: number of SH blocks per SE for the asic
   3097 * @max_rb_num_per_se: max RBs (render backends) per SE for the asic
   3098 *
   3099 * Configures per-SE/SH RB registers (CIK).
   3100 */
   3101static void cik_setup_rb(struct radeon_device *rdev,
   3102			 u32 se_num, u32 sh_per_se,
   3103			 u32 max_rb_num_per_se)
   3104{
   3105	int i, j;
   3106	u32 data, mask;
   3107	u32 disabled_rbs = 0;
   3108	u32 enabled_rbs = 0;
   3109
   3110	for (i = 0; i < se_num; i++) {
   3111		for (j = 0; j < sh_per_se; j++) {
   3112			cik_select_se_sh(rdev, i, j);
   3113			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
   3114			if (rdev->family == CHIP_HAWAII)
   3115				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
   3116			else
   3117				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
   3118		}
   3119	}
   3120	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   3121
   3122	mask = 1;
   3123	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
   3124		if (!(disabled_rbs & mask))
   3125			enabled_rbs |= mask;
   3126		mask <<= 1;
   3127	}
   3128
   3129	rdev->config.cik.backend_enable_mask = enabled_rbs;
   3130
   3131	for (i = 0; i < se_num; i++) {
   3132		cik_select_se_sh(rdev, i, 0xffffffff);
   3133		data = 0;
   3134		for (j = 0; j < sh_per_se; j++) {
   3135			switch (enabled_rbs & 3) {
   3136			case 0:
   3137				if (j == 0)
   3138					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
   3139				else
   3140					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
   3141				break;
   3142			case 1:
   3143				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
   3144				break;
   3145			case 2:
   3146				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
   3147				break;
   3148			case 3:
   3149			default:
   3150				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
   3151				break;
   3152			}
   3153			enabled_rbs >>= 2;
   3154		}
   3155		WREG32(PA_SC_RASTER_CONFIG, data);
   3156	}
   3157	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   3158}
   3159
   3160/**
   3161 * cik_gpu_init - setup the 3D engine
   3162 *
   3163 * @rdev: radeon_device pointer
   3164 *
   3165 * Configures the 3D engine and tiling configuration
   3166 * registers so that the 3D engine is usable.
   3167 */
   3168static void cik_gpu_init(struct radeon_device *rdev)
   3169{
   3170	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
   3171	u32 mc_arb_ramcfg;
   3172	u32 hdp_host_path_cntl;
   3173	u32 tmp;
   3174	int i, j;
   3175
   3176	switch (rdev->family) {
   3177	case CHIP_BONAIRE:
   3178		rdev->config.cik.max_shader_engines = 2;
   3179		rdev->config.cik.max_tile_pipes = 4;
   3180		rdev->config.cik.max_cu_per_sh = 7;
   3181		rdev->config.cik.max_sh_per_se = 1;
   3182		rdev->config.cik.max_backends_per_se = 2;
   3183		rdev->config.cik.max_texture_channel_caches = 4;
   3184		rdev->config.cik.max_gprs = 256;
   3185		rdev->config.cik.max_gs_threads = 32;
   3186		rdev->config.cik.max_hw_contexts = 8;
   3187
   3188		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
   3189		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
   3190		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
   3191		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
   3192		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
   3193		break;
   3194	case CHIP_HAWAII:
   3195		rdev->config.cik.max_shader_engines = 4;
   3196		rdev->config.cik.max_tile_pipes = 16;
   3197		rdev->config.cik.max_cu_per_sh = 11;
   3198		rdev->config.cik.max_sh_per_se = 1;
   3199		rdev->config.cik.max_backends_per_se = 4;
   3200		rdev->config.cik.max_texture_channel_caches = 16;
   3201		rdev->config.cik.max_gprs = 256;
   3202		rdev->config.cik.max_gs_threads = 32;
   3203		rdev->config.cik.max_hw_contexts = 8;
   3204
   3205		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
   3206		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
   3207		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
   3208		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
   3209		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
   3210		break;
   3211	case CHIP_KAVERI:
   3212		rdev->config.cik.max_shader_engines = 1;
   3213		rdev->config.cik.max_tile_pipes = 4;
   3214		rdev->config.cik.max_cu_per_sh = 8;
   3215		rdev->config.cik.max_backends_per_se = 2;
   3216		rdev->config.cik.max_sh_per_se = 1;
   3217		rdev->config.cik.max_texture_channel_caches = 4;
   3218		rdev->config.cik.max_gprs = 256;
   3219		rdev->config.cik.max_gs_threads = 16;
   3220		rdev->config.cik.max_hw_contexts = 8;
   3221
   3222		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
   3223		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
   3224		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
   3225		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
   3226		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
   3227		break;
   3228	case CHIP_KABINI:
   3229	case CHIP_MULLINS:
   3230	default:
   3231		rdev->config.cik.max_shader_engines = 1;
   3232		rdev->config.cik.max_tile_pipes = 2;
   3233		rdev->config.cik.max_cu_per_sh = 2;
   3234		rdev->config.cik.max_sh_per_se = 1;
   3235		rdev->config.cik.max_backends_per_se = 1;
   3236		rdev->config.cik.max_texture_channel_caches = 2;
   3237		rdev->config.cik.max_gprs = 256;
   3238		rdev->config.cik.max_gs_threads = 16;
   3239		rdev->config.cik.max_hw_contexts = 8;
   3240
   3241		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
   3242		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
   3243		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
   3244		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
   3245		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
   3246		break;
   3247	}
   3248
   3249	/* Initialize HDP */
   3250	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
   3251		WREG32((0x2c14 + j), 0x00000000);
   3252		WREG32((0x2c18 + j), 0x00000000);
   3253		WREG32((0x2c1c + j), 0x00000000);
   3254		WREG32((0x2c20 + j), 0x00000000);
   3255		WREG32((0x2c24 + j), 0x00000000);
   3256	}
   3257
   3258	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
   3259	WREG32(SRBM_INT_CNTL, 0x1);
   3260	WREG32(SRBM_INT_ACK, 0x1);
   3261
   3262	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
   3263
   3264	RREG32(MC_SHARED_CHMAP);
   3265	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
   3266
   3267	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
   3268	rdev->config.cik.mem_max_burst_length_bytes = 256;
   3269	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
   3270	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
   3271	if (rdev->config.cik.mem_row_size_in_kb > 4)
   3272		rdev->config.cik.mem_row_size_in_kb = 4;
   3273	/* XXX use MC settings? */
   3274	rdev->config.cik.shader_engine_tile_size = 32;
   3275	rdev->config.cik.num_gpus = 1;
   3276	rdev->config.cik.multi_gpu_tile_size = 64;
   3277
   3278	/* fix up row size */
   3279	gb_addr_config &= ~ROW_SIZE_MASK;
   3280	switch (rdev->config.cik.mem_row_size_in_kb) {
   3281	case 1:
   3282	default:
   3283		gb_addr_config |= ROW_SIZE(0);
   3284		break;
   3285	case 2:
   3286		gb_addr_config |= ROW_SIZE(1);
   3287		break;
   3288	case 4:
   3289		gb_addr_config |= ROW_SIZE(2);
   3290		break;
   3291	}
   3292
   3293	/* setup tiling info dword.  gb_addr_config is not adequate since it does
   3294	 * not have bank info, so create a custom tiling dword.
   3295	 * bits 3:0   num_pipes
   3296	 * bits 7:4   num_banks
   3297	 * bits 11:8  group_size
   3298	 * bits 15:12 row_size
   3299	 */
   3300	rdev->config.cik.tile_config = 0;
   3301	switch (rdev->config.cik.num_tile_pipes) {
   3302	case 1:
   3303		rdev->config.cik.tile_config |= (0 << 0);
   3304		break;
   3305	case 2:
   3306		rdev->config.cik.tile_config |= (1 << 0);
   3307		break;
   3308	case 4:
   3309		rdev->config.cik.tile_config |= (2 << 0);
   3310		break;
   3311	case 8:
   3312	default:
   3313		/* XXX what about 12? */
   3314		rdev->config.cik.tile_config |= (3 << 0);
   3315		break;
   3316	}
   3317	rdev->config.cik.tile_config |=
   3318		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
   3319	rdev->config.cik.tile_config |=
   3320		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
   3321	rdev->config.cik.tile_config |=
   3322		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
   3323
   3324	WREG32(GB_ADDR_CONFIG, gb_addr_config);
   3325	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
   3326	WREG32(DMIF_ADDR_CALC, gb_addr_config);
   3327	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
   3328	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
   3329	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
   3330	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
   3331	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
   3332
   3333	cik_tiling_mode_table_init(rdev);
   3334
   3335	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
   3336		     rdev->config.cik.max_sh_per_se,
   3337		     rdev->config.cik.max_backends_per_se);
   3338
   3339	rdev->config.cik.active_cus = 0;
   3340	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
   3341		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
   3342			rdev->config.cik.active_cus +=
   3343				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
   3344		}
   3345	}
   3346
   3347	/* set HW defaults for 3D engine */
   3348	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
   3349
   3350	WREG32(SX_DEBUG_1, 0x20);
   3351
   3352	WREG32(TA_CNTL_AUX, 0x00010000);
   3353
   3354	tmp = RREG32(SPI_CONFIG_CNTL);
   3355	tmp |= 0x03000000;
   3356	WREG32(SPI_CONFIG_CNTL, tmp);
   3357
   3358	WREG32(SQ_CONFIG, 1);
   3359
   3360	WREG32(DB_DEBUG, 0);
   3361
   3362	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
   3363	tmp |= 0x00000400;
   3364	WREG32(DB_DEBUG2, tmp);
   3365
   3366	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
   3367	tmp |= 0x00020200;
   3368	WREG32(DB_DEBUG3, tmp);
   3369
   3370	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
   3371	tmp |= 0x00018208;
   3372	WREG32(CB_HW_CONTROL, tmp);
   3373
   3374	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
   3375
   3376	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
   3377				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
   3378				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
   3379				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
   3380
   3381	WREG32(VGT_NUM_INSTANCES, 1);
   3382
   3383	WREG32(CP_PERFMON_CNTL, 0);
   3384
   3385	WREG32(SQ_CONFIG, 0);
   3386
   3387	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
   3388					  FORCE_EOV_MAX_REZ_CNT(255)));
   3389
   3390	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
   3391	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
   3392
   3393	WREG32(VGT_GS_VERTEX_REUSE, 16);
   3394	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
   3395
   3396	tmp = RREG32(HDP_MISC_CNTL);
   3397	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
   3398	WREG32(HDP_MISC_CNTL, tmp);
   3399
   3400	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
   3401	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
   3402
   3403	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
   3404	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
   3405
   3406	udelay(50);
   3407}
   3408
   3409/*
   3410 * GPU scratch registers helpers function.
   3411 */
   3412/**
   3413 * cik_scratch_init - setup driver info for CP scratch regs
   3414 *
   3415 * @rdev: radeon_device pointer
   3416 *
   3417 * Set up the number and offset of the CP scratch registers.
   3418 * NOTE: use of CP scratch registers is a legacy inferface and
   3419 * is not used by default on newer asics (r6xx+).  On newer asics,
   3420 * memory buffers are used for fences rather than scratch regs.
   3421 */
   3422static void cik_scratch_init(struct radeon_device *rdev)
   3423{
   3424	int i;
   3425
   3426	rdev->scratch.num_reg = 7;
   3427	rdev->scratch.reg_base = SCRATCH_REG0;
   3428	for (i = 0; i < rdev->scratch.num_reg; i++) {
   3429		rdev->scratch.free[i] = true;
   3430		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
   3431	}
   3432}
   3433
   3434/**
   3435 * cik_ring_test - basic gfx ring test
   3436 *
   3437 * @rdev: radeon_device pointer
   3438 * @ring: radeon_ring structure holding ring information
   3439 *
   3440 * Allocate a scratch register and write to it using the gfx ring (CIK).
   3441 * Provides a basic gfx ring test to verify that the ring is working.
   3442 * Used by cik_cp_gfx_resume();
   3443 * Returns 0 on success, error on failure.
   3444 */
   3445int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
   3446{
   3447	uint32_t scratch;
   3448	uint32_t tmp = 0;
   3449	unsigned i;
   3450	int r;
   3451
   3452	r = radeon_scratch_get(rdev, &scratch);
   3453	if (r) {
   3454		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
   3455		return r;
   3456	}
   3457	WREG32(scratch, 0xCAFEDEAD);
   3458	r = radeon_ring_lock(rdev, ring, 3);
   3459	if (r) {
   3460		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
   3461		radeon_scratch_free(rdev, scratch);
   3462		return r;
   3463	}
   3464	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
   3465	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
   3466	radeon_ring_write(ring, 0xDEADBEEF);
   3467	radeon_ring_unlock_commit(rdev, ring, false);
   3468
   3469	for (i = 0; i < rdev->usec_timeout; i++) {
   3470		tmp = RREG32(scratch);
   3471		if (tmp == 0xDEADBEEF)
   3472			break;
   3473		udelay(1);
   3474	}
   3475	if (i < rdev->usec_timeout) {
   3476		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
   3477	} else {
   3478		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
   3479			  ring->idx, scratch, tmp);
   3480		r = -EINVAL;
   3481	}
   3482	radeon_scratch_free(rdev, scratch);
   3483	return r;
   3484}
   3485
   3486/**
   3487 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
   3488 *
   3489 * @rdev: radeon_device pointer
   3490 * @ridx: radeon ring index
   3491 *
   3492 * Emits an hdp flush on the cp.
   3493 */
   3494static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
   3495				       int ridx)
   3496{
   3497	struct radeon_ring *ring = &rdev->ring[ridx];
   3498	u32 ref_and_mask;
   3499
   3500	switch (ring->idx) {
   3501	case CAYMAN_RING_TYPE_CP1_INDEX:
   3502	case CAYMAN_RING_TYPE_CP2_INDEX:
   3503	default:
   3504		switch (ring->me) {
   3505		case 0:
   3506			ref_and_mask = CP2 << ring->pipe;
   3507			break;
   3508		case 1:
   3509			ref_and_mask = CP6 << ring->pipe;
   3510			break;
   3511		default:
   3512			return;
   3513		}
   3514		break;
   3515	case RADEON_RING_TYPE_GFX_INDEX:
   3516		ref_and_mask = CP0;
   3517		break;
   3518	}
   3519
   3520	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
   3521	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
   3522				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
   3523				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
   3524	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
   3525	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
   3526	radeon_ring_write(ring, ref_and_mask);
   3527	radeon_ring_write(ring, ref_and_mask);
   3528	radeon_ring_write(ring, 0x20); /* poll interval */
   3529}
   3530
   3531/**
   3532 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
   3533 *
   3534 * @rdev: radeon_device pointer
   3535 * @fence: radeon fence object
   3536 *
   3537 * Emits a fence sequnce number on the gfx ring and flushes
   3538 * GPU caches.
   3539 */
   3540void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
   3541			     struct radeon_fence *fence)
   3542{
   3543	struct radeon_ring *ring = &rdev->ring[fence->ring];
   3544	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
   3545
   3546	/* Workaround for cache flush problems. First send a dummy EOP
   3547	 * event down the pipe with seq one below.
   3548	 */
   3549	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
   3550	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
   3551				 EOP_TC_ACTION_EN |
   3552				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   3553				 EVENT_INDEX(5)));
   3554	radeon_ring_write(ring, addr & 0xfffffffc);
   3555	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
   3556				DATA_SEL(1) | INT_SEL(0));
   3557	radeon_ring_write(ring, fence->seq - 1);
   3558	radeon_ring_write(ring, 0);
   3559
   3560	/* Then send the real EOP event down the pipe. */
   3561	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
   3562	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
   3563				 EOP_TC_ACTION_EN |
   3564				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   3565				 EVENT_INDEX(5)));
   3566	radeon_ring_write(ring, addr & 0xfffffffc);
   3567	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
   3568	radeon_ring_write(ring, fence->seq);
   3569	radeon_ring_write(ring, 0);
   3570}
   3571
   3572/**
   3573 * cik_fence_compute_ring_emit - emit a fence on the compute ring
   3574 *
   3575 * @rdev: radeon_device pointer
   3576 * @fence: radeon fence object
   3577 *
   3578 * Emits a fence sequnce number on the compute ring and flushes
   3579 * GPU caches.
   3580 */
   3581void cik_fence_compute_ring_emit(struct radeon_device *rdev,
   3582				 struct radeon_fence *fence)
   3583{
   3584	struct radeon_ring *ring = &rdev->ring[fence->ring];
   3585	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
   3586
   3587	/* RELEASE_MEM - flush caches, send int */
   3588	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
   3589	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
   3590				 EOP_TC_ACTION_EN |
   3591				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
   3592				 EVENT_INDEX(5)));
   3593	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
   3594	radeon_ring_write(ring, addr & 0xfffffffc);
   3595	radeon_ring_write(ring, upper_32_bits(addr));
   3596	radeon_ring_write(ring, fence->seq);
   3597	radeon_ring_write(ring, 0);
   3598}
   3599
   3600/**
   3601 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
   3602 *
   3603 * @rdev: radeon_device pointer
   3604 * @ring: radeon ring buffer object
   3605 * @semaphore: radeon semaphore object
   3606 * @emit_wait: Is this a sempahore wait?
   3607 *
   3608 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
   3609 * from running ahead of semaphore waits.
   3610 */
   3611bool cik_semaphore_ring_emit(struct radeon_device *rdev,
   3612			     struct radeon_ring *ring,
   3613			     struct radeon_semaphore *semaphore,
   3614			     bool emit_wait)
   3615{
   3616	uint64_t addr = semaphore->gpu_addr;
   3617	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
   3618
   3619	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
   3620	radeon_ring_write(ring, lower_32_bits(addr));
   3621	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
   3622
   3623	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
   3624		/* Prevent the PFP from running ahead of the semaphore wait */
   3625		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
   3626		radeon_ring_write(ring, 0x0);
   3627	}
   3628
   3629	return true;
   3630}
   3631
   3632/**
   3633 * cik_copy_cpdma - copy pages using the CP DMA engine
   3634 *
   3635 * @rdev: radeon_device pointer
   3636 * @src_offset: src GPU address
   3637 * @dst_offset: dst GPU address
   3638 * @num_gpu_pages: number of GPU pages to xfer
   3639 * @resv: reservation object to sync to
   3640 *
   3641 * Copy GPU paging using the CP DMA engine (CIK+).
   3642 * Used by the radeon ttm implementation to move pages if
   3643 * registered as the asic copy callback.
   3644 */
   3645struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
   3646				    uint64_t src_offset, uint64_t dst_offset,
   3647				    unsigned num_gpu_pages,
   3648				    struct dma_resv *resv)
   3649{
   3650	struct radeon_fence *fence;
   3651	struct radeon_sync sync;
   3652	int ring_index = rdev->asic->copy.blit_ring_index;
   3653	struct radeon_ring *ring = &rdev->ring[ring_index];
   3654	u32 size_in_bytes, cur_size_in_bytes, control;
   3655	int i, num_loops;
   3656	int r = 0;
   3657
   3658	radeon_sync_create(&sync);
   3659
   3660	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
   3661	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
   3662	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
   3663	if (r) {
   3664		DRM_ERROR("radeon: moving bo (%d).\n", r);
   3665		radeon_sync_free(rdev, &sync, NULL);
   3666		return ERR_PTR(r);
   3667	}
   3668
   3669	radeon_sync_resv(rdev, &sync, resv, false);
   3670	radeon_sync_rings(rdev, &sync, ring->idx);
   3671
   3672	for (i = 0; i < num_loops; i++) {
   3673		cur_size_in_bytes = size_in_bytes;
   3674		if (cur_size_in_bytes > 0x1fffff)
   3675			cur_size_in_bytes = 0x1fffff;
   3676		size_in_bytes -= cur_size_in_bytes;
   3677		control = 0;
   3678		if (size_in_bytes == 0)
   3679			control |= PACKET3_DMA_DATA_CP_SYNC;
   3680		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
   3681		radeon_ring_write(ring, control);
   3682		radeon_ring_write(ring, lower_32_bits(src_offset));
   3683		radeon_ring_write(ring, upper_32_bits(src_offset));
   3684		radeon_ring_write(ring, lower_32_bits(dst_offset));
   3685		radeon_ring_write(ring, upper_32_bits(dst_offset));
   3686		radeon_ring_write(ring, cur_size_in_bytes);
   3687		src_offset += cur_size_in_bytes;
   3688		dst_offset += cur_size_in_bytes;
   3689	}
   3690
   3691	r = radeon_fence_emit(rdev, &fence, ring->idx);
   3692	if (r) {
   3693		radeon_ring_unlock_undo(rdev, ring);
   3694		radeon_sync_free(rdev, &sync, NULL);
   3695		return ERR_PTR(r);
   3696	}
   3697
   3698	radeon_ring_unlock_commit(rdev, ring, false);
   3699	radeon_sync_free(rdev, &sync, fence);
   3700
   3701	return fence;
   3702}
   3703
   3704/*
   3705 * IB stuff
   3706 */
   3707/**
   3708 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
   3709 *
   3710 * @rdev: radeon_device pointer
   3711 * @ib: radeon indirect buffer object
   3712 *
   3713 * Emits a DE (drawing engine) or CE (constant engine) IB
   3714 * on the gfx ring.  IBs are usually generated by userspace
   3715 * acceleration drivers and submitted to the kernel for
   3716 * scheduling on the ring.  This function schedules the IB
   3717 * on the gfx ring for execution by the GPU.
   3718 */
   3719void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
   3720{
   3721	struct radeon_ring *ring = &rdev->ring[ib->ring];
   3722	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
   3723	u32 header, control = INDIRECT_BUFFER_VALID;
   3724
   3725	if (ib->is_const_ib) {
   3726		/* set switch buffer packet before const IB */
   3727		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
   3728		radeon_ring_write(ring, 0);
   3729
   3730		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
   3731	} else {
   3732		u32 next_rptr;
   3733		if (ring->rptr_save_reg) {
   3734			next_rptr = ring->wptr + 3 + 4;
   3735			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
   3736			radeon_ring_write(ring, ((ring->rptr_save_reg -
   3737						  PACKET3_SET_UCONFIG_REG_START) >> 2));
   3738			radeon_ring_write(ring, next_rptr);
   3739		} else if (rdev->wb.enabled) {
   3740			next_rptr = ring->wptr + 5 + 4;
   3741			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   3742			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
   3743			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
   3744			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
   3745			radeon_ring_write(ring, next_rptr);
   3746		}
   3747
   3748		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
   3749	}
   3750
   3751	control |= ib->length_dw | (vm_id << 24);
   3752
   3753	radeon_ring_write(ring, header);
   3754	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
   3755	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
   3756	radeon_ring_write(ring, control);
   3757}
   3758
   3759/**
   3760 * cik_ib_test - basic gfx ring IB test
   3761 *
   3762 * @rdev: radeon_device pointer
   3763 * @ring: radeon_ring structure holding ring information
   3764 *
   3765 * Allocate an IB and execute it on the gfx ring (CIK).
   3766 * Provides a basic gfx ring test to verify that IBs are working.
   3767 * Returns 0 on success, error on failure.
   3768 */
   3769int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
   3770{
   3771	struct radeon_ib ib;
   3772	uint32_t scratch;
   3773	uint32_t tmp = 0;
   3774	unsigned i;
   3775	int r;
   3776
   3777	r = radeon_scratch_get(rdev, &scratch);
   3778	if (r) {
   3779		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
   3780		return r;
   3781	}
   3782	WREG32(scratch, 0xCAFEDEAD);
   3783	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
   3784	if (r) {
   3785		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
   3786		radeon_scratch_free(rdev, scratch);
   3787		return r;
   3788	}
   3789	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
   3790	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
   3791	ib.ptr[2] = 0xDEADBEEF;
   3792	ib.length_dw = 3;
   3793	r = radeon_ib_schedule(rdev, &ib, NULL, false);
   3794	if (r) {
   3795		radeon_scratch_free(rdev, scratch);
   3796		radeon_ib_free(rdev, &ib);
   3797		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
   3798		return r;
   3799	}
   3800	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
   3801		RADEON_USEC_IB_TEST_TIMEOUT));
   3802	if (r < 0) {
   3803		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
   3804		radeon_scratch_free(rdev, scratch);
   3805		radeon_ib_free(rdev, &ib);
   3806		return r;
   3807	} else if (r == 0) {
   3808		DRM_ERROR("radeon: fence wait timed out.\n");
   3809		radeon_scratch_free(rdev, scratch);
   3810		radeon_ib_free(rdev, &ib);
   3811		return -ETIMEDOUT;
   3812	}
   3813	r = 0;
   3814	for (i = 0; i < rdev->usec_timeout; i++) {
   3815		tmp = RREG32(scratch);
   3816		if (tmp == 0xDEADBEEF)
   3817			break;
   3818		udelay(1);
   3819	}
   3820	if (i < rdev->usec_timeout) {
   3821		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
   3822	} else {
   3823		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
   3824			  scratch, tmp);
   3825		r = -EINVAL;
   3826	}
   3827	radeon_scratch_free(rdev, scratch);
   3828	radeon_ib_free(rdev, &ib);
   3829	return r;
   3830}
   3831
   3832/*
   3833 * CP.
   3834 * On CIK, gfx and compute now have independant command processors.
   3835 *
   3836 * GFX
   3837 * Gfx consists of a single ring and can process both gfx jobs and
   3838 * compute jobs.  The gfx CP consists of three microengines (ME):
   3839 * PFP - Pre-Fetch Parser
   3840 * ME - Micro Engine
   3841 * CE - Constant Engine
   3842 * The PFP and ME make up what is considered the Drawing Engine (DE).
   3843 * The CE is an asynchronous engine used for updating buffer desciptors
   3844 * used by the DE so that they can be loaded into cache in parallel
   3845 * while the DE is processing state update packets.
   3846 *
   3847 * Compute
   3848 * The compute CP consists of two microengines (ME):
   3849 * MEC1 - Compute MicroEngine 1
   3850 * MEC2 - Compute MicroEngine 2
   3851 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
   3852 * The queues are exposed to userspace and are programmed directly
   3853 * by the compute runtime.
   3854 */
   3855/**
   3856 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
   3857 *
   3858 * @rdev: radeon_device pointer
   3859 * @enable: enable or disable the MEs
   3860 *
   3861 * Halts or unhalts the gfx MEs.
   3862 */
   3863static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
   3864{
   3865	if (enable)
   3866		WREG32(CP_ME_CNTL, 0);
   3867	else {
   3868		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
   3869			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
   3870		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
   3871		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
   3872	}
   3873	udelay(50);
   3874}
   3875
   3876/**
   3877 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
   3878 *
   3879 * @rdev: radeon_device pointer
   3880 *
   3881 * Loads the gfx PFP, ME, and CE ucode.
   3882 * Returns 0 for success, -EINVAL if the ucode is not available.
   3883 */
   3884static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
   3885{
   3886	int i;
   3887
   3888	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
   3889		return -EINVAL;
   3890
   3891	cik_cp_gfx_enable(rdev, false);
   3892
   3893	if (rdev->new_fw) {
   3894		const struct gfx_firmware_header_v1_0 *pfp_hdr =
   3895			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
   3896		const struct gfx_firmware_header_v1_0 *ce_hdr =
   3897			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
   3898		const struct gfx_firmware_header_v1_0 *me_hdr =
   3899			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
   3900		const __le32 *fw_data;
   3901		u32 fw_size;
   3902
   3903		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
   3904		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
   3905		radeon_ucode_print_gfx_hdr(&me_hdr->header);
   3906
   3907		/* PFP */
   3908		fw_data = (const __le32 *)
   3909			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
   3910		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
   3911		WREG32(CP_PFP_UCODE_ADDR, 0);
   3912		for (i = 0; i < fw_size; i++)
   3913			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
   3914		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
   3915
   3916		/* CE */
   3917		fw_data = (const __le32 *)
   3918			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
   3919		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
   3920		WREG32(CP_CE_UCODE_ADDR, 0);
   3921		for (i = 0; i < fw_size; i++)
   3922			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
   3923		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
   3924
   3925		/* ME */
   3926		fw_data = (const __be32 *)
   3927			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
   3928		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
   3929		WREG32(CP_ME_RAM_WADDR, 0);
   3930		for (i = 0; i < fw_size; i++)
   3931			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
   3932		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
   3933		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
   3934	} else {
   3935		const __be32 *fw_data;
   3936
   3937		/* PFP */
   3938		fw_data = (const __be32 *)rdev->pfp_fw->data;
   3939		WREG32(CP_PFP_UCODE_ADDR, 0);
   3940		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
   3941			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
   3942		WREG32(CP_PFP_UCODE_ADDR, 0);
   3943
   3944		/* CE */
   3945		fw_data = (const __be32 *)rdev->ce_fw->data;
   3946		WREG32(CP_CE_UCODE_ADDR, 0);
   3947		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
   3948			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
   3949		WREG32(CP_CE_UCODE_ADDR, 0);
   3950
   3951		/* ME */
   3952		fw_data = (const __be32 *)rdev->me_fw->data;
   3953		WREG32(CP_ME_RAM_WADDR, 0);
   3954		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
   3955			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
   3956		WREG32(CP_ME_RAM_WADDR, 0);
   3957	}
   3958
   3959	return 0;
   3960}
   3961
   3962/**
   3963 * cik_cp_gfx_start - start the gfx ring
   3964 *
   3965 * @rdev: radeon_device pointer
   3966 *
   3967 * Enables the ring and loads the clear state context and other
   3968 * packets required to init the ring.
   3969 * Returns 0 for success, error for failure.
   3970 */
   3971static int cik_cp_gfx_start(struct radeon_device *rdev)
   3972{
   3973	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
   3974	int r, i;
   3975
   3976	/* init the CP */
   3977	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
   3978	WREG32(CP_ENDIAN_SWAP, 0);
   3979	WREG32(CP_DEVICE_ID, 1);
   3980
   3981	cik_cp_gfx_enable(rdev, true);
   3982
   3983	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
   3984	if (r) {
   3985		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
   3986		return r;
   3987	}
   3988
   3989	/* init the CE partitions.  CE only used for gfx on CIK */
   3990	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
   3991	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
   3992	radeon_ring_write(ring, 0x8000);
   3993	radeon_ring_write(ring, 0x8000);
   3994
   3995	/* setup clear context state */
   3996	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   3997	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
   3998
   3999	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   4000	radeon_ring_write(ring, 0x80000000);
   4001	radeon_ring_write(ring, 0x80000000);
   4002
   4003	for (i = 0; i < cik_default_size; i++)
   4004		radeon_ring_write(ring, cik_default_state[i]);
   4005
   4006	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   4007	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
   4008
   4009	/* set clear context state */
   4010	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
   4011	radeon_ring_write(ring, 0);
   4012
   4013	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
   4014	radeon_ring_write(ring, 0x00000316);
   4015	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
   4016	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
   4017
   4018	radeon_ring_unlock_commit(rdev, ring, false);
   4019
   4020	return 0;
   4021}
   4022
   4023/**
   4024 * cik_cp_gfx_fini - stop the gfx ring
   4025 *
   4026 * @rdev: radeon_device pointer
   4027 *
   4028 * Stop the gfx ring and tear down the driver ring
   4029 * info.
   4030 */
   4031static void cik_cp_gfx_fini(struct radeon_device *rdev)
   4032{
   4033	cik_cp_gfx_enable(rdev, false);
   4034	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
   4035}
   4036
   4037/**
   4038 * cik_cp_gfx_resume - setup the gfx ring buffer registers
   4039 *
   4040 * @rdev: radeon_device pointer
   4041 *
   4042 * Program the location and size of the gfx ring buffer
   4043 * and test it to make sure it's working.
   4044 * Returns 0 for success, error for failure.
   4045 */
   4046static int cik_cp_gfx_resume(struct radeon_device *rdev)
   4047{
   4048	struct radeon_ring *ring;
   4049	u32 tmp;
   4050	u32 rb_bufsz;
   4051	u64 rb_addr;
   4052	int r;
   4053
   4054	WREG32(CP_SEM_WAIT_TIMER, 0x0);
   4055	if (rdev->family != CHIP_HAWAII)
   4056		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
   4057
   4058	/* Set the write pointer delay */
   4059	WREG32(CP_RB_WPTR_DELAY, 0);
   4060
   4061	/* set the RB to use vmid 0 */
   4062	WREG32(CP_RB_VMID, 0);
   4063
   4064	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
   4065
   4066	/* ring 0 - compute and gfx */
   4067	/* Set ring buffer size */
   4068	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
   4069	rb_bufsz = order_base_2(ring->ring_size / 8);
   4070	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
   4071#ifdef __BIG_ENDIAN
   4072	tmp |= BUF_SWAP_32BIT;
   4073#endif
   4074	WREG32(CP_RB0_CNTL, tmp);
   4075
   4076	/* Initialize the ring buffer's read and write pointers */
   4077	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
   4078	ring->wptr = 0;
   4079	WREG32(CP_RB0_WPTR, ring->wptr);
   4080
   4081	/* set the wb address wether it's enabled or not */
   4082	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
   4083	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
   4084
   4085	/* scratch register shadowing is no longer supported */
   4086	WREG32(SCRATCH_UMSK, 0);
   4087
   4088	if (!rdev->wb.enabled)
   4089		tmp |= RB_NO_UPDATE;
   4090
   4091	mdelay(1);
   4092	WREG32(CP_RB0_CNTL, tmp);
   4093
   4094	rb_addr = ring->gpu_addr >> 8;
   4095	WREG32(CP_RB0_BASE, rb_addr);
   4096	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
   4097
   4098	/* start the ring */
   4099	cik_cp_gfx_start(rdev);
   4100	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
   4101	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
   4102	if (r) {
   4103		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
   4104		return r;
   4105	}
   4106
   4107	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
   4108		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
   4109
   4110	return 0;
   4111}
   4112
   4113u32 cik_gfx_get_rptr(struct radeon_device *rdev,
   4114		     struct radeon_ring *ring)
   4115{
   4116	u32 rptr;
   4117
   4118	if (rdev->wb.enabled)
   4119		rptr = rdev->wb.wb[ring->rptr_offs/4];
   4120	else
   4121		rptr = RREG32(CP_RB0_RPTR);
   4122
   4123	return rptr;
   4124}
   4125
   4126u32 cik_gfx_get_wptr(struct radeon_device *rdev,
   4127		     struct radeon_ring *ring)
   4128{
   4129	return RREG32(CP_RB0_WPTR);
   4130}
   4131
   4132void cik_gfx_set_wptr(struct radeon_device *rdev,
   4133		      struct radeon_ring *ring)
   4134{
   4135	WREG32(CP_RB0_WPTR, ring->wptr);
   4136	(void)RREG32(CP_RB0_WPTR);
   4137}
   4138
   4139u32 cik_compute_get_rptr(struct radeon_device *rdev,
   4140			 struct radeon_ring *ring)
   4141{
   4142	u32 rptr;
   4143
   4144	if (rdev->wb.enabled) {
   4145		rptr = rdev->wb.wb[ring->rptr_offs/4];
   4146	} else {
   4147		mutex_lock(&rdev->srbm_mutex);
   4148		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
   4149		rptr = RREG32(CP_HQD_PQ_RPTR);
   4150		cik_srbm_select(rdev, 0, 0, 0, 0);
   4151		mutex_unlock(&rdev->srbm_mutex);
   4152	}
   4153
   4154	return rptr;
   4155}
   4156
   4157u32 cik_compute_get_wptr(struct radeon_device *rdev,
   4158			 struct radeon_ring *ring)
   4159{
   4160	u32 wptr;
   4161
   4162	if (rdev->wb.enabled) {
   4163		/* XXX check if swapping is necessary on BE */
   4164		wptr = rdev->wb.wb[ring->wptr_offs/4];
   4165	} else {
   4166		mutex_lock(&rdev->srbm_mutex);
   4167		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
   4168		wptr = RREG32(CP_HQD_PQ_WPTR);
   4169		cik_srbm_select(rdev, 0, 0, 0, 0);
   4170		mutex_unlock(&rdev->srbm_mutex);
   4171	}
   4172
   4173	return wptr;
   4174}
   4175
   4176void cik_compute_set_wptr(struct radeon_device *rdev,
   4177			  struct radeon_ring *ring)
   4178{
   4179	/* XXX check if swapping is necessary on BE */
   4180	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
   4181	WDOORBELL32(ring->doorbell_index, ring->wptr);
   4182}
   4183
   4184static void cik_compute_stop(struct radeon_device *rdev,
   4185			     struct radeon_ring *ring)
   4186{
   4187	u32 j, tmp;
   4188
   4189	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
   4190	/* Disable wptr polling. */
   4191	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
   4192	tmp &= ~WPTR_POLL_EN;
   4193	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
   4194	/* Disable HQD. */
   4195	if (RREG32(CP_HQD_ACTIVE) & 1) {
   4196		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
   4197		for (j = 0; j < rdev->usec_timeout; j++) {
   4198			if (!(RREG32(CP_HQD_ACTIVE) & 1))
   4199				break;
   4200			udelay(1);
   4201		}
   4202		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
   4203		WREG32(CP_HQD_PQ_RPTR, 0);
   4204		WREG32(CP_HQD_PQ_WPTR, 0);
   4205	}
   4206	cik_srbm_select(rdev, 0, 0, 0, 0);
   4207}
   4208
   4209/**
   4210 * cik_cp_compute_enable - enable/disable the compute CP MEs
   4211 *
   4212 * @rdev: radeon_device pointer
   4213 * @enable: enable or disable the MEs
   4214 *
   4215 * Halts or unhalts the compute MEs.
   4216 */
   4217static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
   4218{
   4219	if (enable)
   4220		WREG32(CP_MEC_CNTL, 0);
   4221	else {
   4222		/*
   4223		 * To make hibernation reliable we need to clear compute ring
   4224		 * configuration before halting the compute ring.
   4225		 */
   4226		mutex_lock(&rdev->srbm_mutex);
   4227		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
   4228		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
   4229		mutex_unlock(&rdev->srbm_mutex);
   4230
   4231		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
   4232		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
   4233		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
   4234	}
   4235	udelay(50);
   4236}
   4237
   4238/**
   4239 * cik_cp_compute_load_microcode - load the compute CP ME ucode
   4240 *
   4241 * @rdev: radeon_device pointer
   4242 *
   4243 * Loads the compute MEC1&2 ucode.
   4244 * Returns 0 for success, -EINVAL if the ucode is not available.
   4245 */
   4246static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
   4247{
   4248	int i;
   4249
   4250	if (!rdev->mec_fw)
   4251		return -EINVAL;
   4252
   4253	cik_cp_compute_enable(rdev, false);
   4254
   4255	if (rdev->new_fw) {
   4256		const struct gfx_firmware_header_v1_0 *mec_hdr =
   4257			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
   4258		const __le32 *fw_data;
   4259		u32 fw_size;
   4260
   4261		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
   4262
   4263		/* MEC1 */
   4264		fw_data = (const __le32 *)
   4265			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
   4266		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
   4267		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
   4268		for (i = 0; i < fw_size; i++)
   4269			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
   4270		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
   4271
   4272		/* MEC2 */
   4273		if (rdev->family == CHIP_KAVERI) {
   4274			const struct gfx_firmware_header_v1_0 *mec2_hdr =
   4275				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
   4276
   4277			fw_data = (const __le32 *)
   4278				(rdev->mec2_fw->data +
   4279				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
   4280			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
   4281			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
   4282			for (i = 0; i < fw_size; i++)
   4283				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
   4284			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
   4285		}
   4286	} else {
   4287		const __be32 *fw_data;
   4288
   4289		/* MEC1 */
   4290		fw_data = (const __be32 *)rdev->mec_fw->data;
   4291		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
   4292		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
   4293			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
   4294		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
   4295
   4296		if (rdev->family == CHIP_KAVERI) {
   4297			/* MEC2 */
   4298			fw_data = (const __be32 *)rdev->mec_fw->data;
   4299			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
   4300			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
   4301				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
   4302			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
   4303		}
   4304	}
   4305
   4306	return 0;
   4307}
   4308
   4309/**
   4310 * cik_cp_compute_start - start the compute queues
   4311 *
   4312 * @rdev: radeon_device pointer
   4313 *
   4314 * Enable the compute queues.
   4315 * Returns 0 for success, error for failure.
   4316 */
   4317static int cik_cp_compute_start(struct radeon_device *rdev)
   4318{
   4319	cik_cp_compute_enable(rdev, true);
   4320
   4321	return 0;
   4322}
   4323
   4324/**
   4325 * cik_cp_compute_fini - stop the compute queues
   4326 *
   4327 * @rdev: radeon_device pointer
   4328 *
   4329 * Stop the compute queues and tear down the driver queue
   4330 * info.
   4331 */
   4332static void cik_cp_compute_fini(struct radeon_device *rdev)
   4333{
   4334	int i, idx, r;
   4335
   4336	cik_cp_compute_enable(rdev, false);
   4337
   4338	for (i = 0; i < 2; i++) {
   4339		if (i == 0)
   4340			idx = CAYMAN_RING_TYPE_CP1_INDEX;
   4341		else
   4342			idx = CAYMAN_RING_TYPE_CP2_INDEX;
   4343
   4344		if (rdev->ring[idx].mqd_obj) {
   4345			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
   4346			if (unlikely(r != 0))
   4347				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
   4348
   4349			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
   4350			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
   4351
   4352			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
   4353			rdev->ring[idx].mqd_obj = NULL;
   4354		}
   4355	}
   4356}
   4357
   4358static void cik_mec_fini(struct radeon_device *rdev)
   4359{
   4360	int r;
   4361
   4362	if (rdev->mec.hpd_eop_obj) {
   4363		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
   4364		if (unlikely(r != 0))
   4365			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
   4366		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
   4367		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
   4368
   4369		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
   4370		rdev->mec.hpd_eop_obj = NULL;
   4371	}
   4372}
   4373
   4374#define MEC_HPD_SIZE 2048
   4375
   4376static int cik_mec_init(struct radeon_device *rdev)
   4377{
   4378	int r;
   4379	u32 *hpd;
   4380
   4381	/*
   4382	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
   4383	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
   4384	 */
   4385	if (rdev->family == CHIP_KAVERI)
   4386		rdev->mec.num_mec = 2;
   4387	else
   4388		rdev->mec.num_mec = 1;
   4389	rdev->mec.num_pipe = 4;
   4390	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
   4391
   4392	if (rdev->mec.hpd_eop_obj == NULL) {
   4393		r = radeon_bo_create(rdev,
   4394				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
   4395				     PAGE_SIZE, true,
   4396				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
   4397				     &rdev->mec.hpd_eop_obj);
   4398		if (r) {
   4399			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
   4400			return r;
   4401		}
   4402	}
   4403
   4404	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
   4405	if (unlikely(r != 0)) {
   4406		cik_mec_fini(rdev);
   4407		return r;
   4408	}
   4409	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
   4410			  &rdev->mec.hpd_eop_gpu_addr);
   4411	if (r) {
   4412		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
   4413		cik_mec_fini(rdev);
   4414		return r;
   4415	}
   4416	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
   4417	if (r) {
   4418		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
   4419		cik_mec_fini(rdev);
   4420		return r;
   4421	}
   4422
   4423	/* clear memory.  Not sure if this is required or not */
   4424	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
   4425
   4426	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
   4427	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
   4428
   4429	return 0;
   4430}
   4431
   4432struct hqd_registers
   4433{
   4434	u32 cp_mqd_base_addr;
   4435	u32 cp_mqd_base_addr_hi;
   4436	u32 cp_hqd_active;
   4437	u32 cp_hqd_vmid;
   4438	u32 cp_hqd_persistent_state;
   4439	u32 cp_hqd_pipe_priority;
   4440	u32 cp_hqd_queue_priority;
   4441	u32 cp_hqd_quantum;
   4442	u32 cp_hqd_pq_base;
   4443	u32 cp_hqd_pq_base_hi;
   4444	u32 cp_hqd_pq_rptr;
   4445	u32 cp_hqd_pq_rptr_report_addr;
   4446	u32 cp_hqd_pq_rptr_report_addr_hi;
   4447	u32 cp_hqd_pq_wptr_poll_addr;
   4448	u32 cp_hqd_pq_wptr_poll_addr_hi;
   4449	u32 cp_hqd_pq_doorbell_control;
   4450	u32 cp_hqd_pq_wptr;
   4451	u32 cp_hqd_pq_control;
   4452	u32 cp_hqd_ib_base_addr;
   4453	u32 cp_hqd_ib_base_addr_hi;
   4454	u32 cp_hqd_ib_rptr;
   4455	u32 cp_hqd_ib_control;
   4456	u32 cp_hqd_iq_timer;
   4457	u32 cp_hqd_iq_rptr;
   4458	u32 cp_hqd_dequeue_request;
   4459	u32 cp_hqd_dma_offload;
   4460	u32 cp_hqd_sema_cmd;
   4461	u32 cp_hqd_msg_type;
   4462	u32 cp_hqd_atomic0_preop_lo;
   4463	u32 cp_hqd_atomic0_preop_hi;
   4464	u32 cp_hqd_atomic1_preop_lo;
   4465	u32 cp_hqd_atomic1_preop_hi;
   4466	u32 cp_hqd_hq_scheduler0;
   4467	u32 cp_hqd_hq_scheduler1;
   4468	u32 cp_mqd_control;
   4469};
   4470
   4471struct bonaire_mqd
   4472{
   4473	u32 header;
   4474	u32 dispatch_initiator;
   4475	u32 dimensions[3];
   4476	u32 start_idx[3];
   4477	u32 num_threads[3];
   4478	u32 pipeline_stat_enable;
   4479	u32 perf_counter_enable;
   4480	u32 pgm[2];
   4481	u32 tba[2];
   4482	u32 tma[2];
   4483	u32 pgm_rsrc[2];
   4484	u32 vmid;
   4485	u32 resource_limits;
   4486	u32 static_thread_mgmt01[2];
   4487	u32 tmp_ring_size;
   4488	u32 static_thread_mgmt23[2];
   4489	u32 restart[3];
   4490	u32 thread_trace_enable;
   4491	u32 reserved1;
   4492	u32 user_data[16];
   4493	u32 vgtcs_invoke_count[2];
   4494	struct hqd_registers queue_state;
   4495	u32 dequeue_cntr;
   4496	u32 interrupt_queue[64];
   4497};
   4498
   4499/**
   4500 * cik_cp_compute_resume - setup the compute queue registers
   4501 *
   4502 * @rdev: radeon_device pointer
   4503 *
   4504 * Program the compute queues and test them to make sure they
   4505 * are working.
   4506 * Returns 0 for success, error for failure.
   4507 */
   4508static int cik_cp_compute_resume(struct radeon_device *rdev)
   4509{
   4510	int r, i, j, idx;
   4511	u32 tmp;
   4512	bool use_doorbell = true;
   4513	u64 hqd_gpu_addr;
   4514	u64 mqd_gpu_addr;
   4515	u64 eop_gpu_addr;
   4516	u64 wb_gpu_addr;
   4517	u32 *buf;
   4518	struct bonaire_mqd *mqd;
   4519
   4520	r = cik_cp_compute_start(rdev);
   4521	if (r)
   4522		return r;
   4523
   4524	/* fix up chicken bits */
   4525	tmp = RREG32(CP_CPF_DEBUG);
   4526	tmp |= (1 << 23);
   4527	WREG32(CP_CPF_DEBUG, tmp);
   4528
   4529	/* init the pipes */
   4530	mutex_lock(&rdev->srbm_mutex);
   4531
   4532	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
   4533		int me = (i < 4) ? 1 : 2;
   4534		int pipe = (i < 4) ? i : (i - 4);
   4535
   4536		cik_srbm_select(rdev, me, pipe, 0, 0);
   4537
   4538		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
   4539		/* write the EOP addr */
   4540		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
   4541		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
   4542
   4543		/* set the VMID assigned */
   4544		WREG32(CP_HPD_EOP_VMID, 0);
   4545
   4546		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
   4547		tmp = RREG32(CP_HPD_EOP_CONTROL);
   4548		tmp &= ~EOP_SIZE_MASK;
   4549		tmp |= order_base_2(MEC_HPD_SIZE / 8);
   4550		WREG32(CP_HPD_EOP_CONTROL, tmp);
   4551
   4552	}
   4553	cik_srbm_select(rdev, 0, 0, 0, 0);
   4554	mutex_unlock(&rdev->srbm_mutex);
   4555
   4556	/* init the queues.  Just two for now. */
   4557	for (i = 0; i < 2; i++) {
   4558		if (i == 0)
   4559			idx = CAYMAN_RING_TYPE_CP1_INDEX;
   4560		else
   4561			idx = CAYMAN_RING_TYPE_CP2_INDEX;
   4562
   4563		if (rdev->ring[idx].mqd_obj == NULL) {
   4564			r = radeon_bo_create(rdev,
   4565					     sizeof(struct bonaire_mqd),
   4566					     PAGE_SIZE, true,
   4567					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
   4568					     NULL, &rdev->ring[idx].mqd_obj);
   4569			if (r) {
   4570				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
   4571				return r;
   4572			}
   4573		}
   4574
   4575		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
   4576		if (unlikely(r != 0)) {
   4577			cik_cp_compute_fini(rdev);
   4578			return r;
   4579		}
   4580		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
   4581				  &mqd_gpu_addr);
   4582		if (r) {
   4583			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
   4584			cik_cp_compute_fini(rdev);
   4585			return r;
   4586		}
   4587		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
   4588		if (r) {
   4589			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
   4590			cik_cp_compute_fini(rdev);
   4591			return r;
   4592		}
   4593
   4594		/* init the mqd struct */
   4595		memset(buf, 0, sizeof(struct bonaire_mqd));
   4596
   4597		mqd = (struct bonaire_mqd *)buf;
   4598		mqd->header = 0xC0310800;
   4599		mqd->static_thread_mgmt01[0] = 0xffffffff;
   4600		mqd->static_thread_mgmt01[1] = 0xffffffff;
   4601		mqd->static_thread_mgmt23[0] = 0xffffffff;
   4602		mqd->static_thread_mgmt23[1] = 0xffffffff;
   4603
   4604		mutex_lock(&rdev->srbm_mutex);
   4605		cik_srbm_select(rdev, rdev->ring[idx].me,
   4606				rdev->ring[idx].pipe,
   4607				rdev->ring[idx].queue, 0);
   4608
   4609		/* disable wptr polling */
   4610		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
   4611		tmp &= ~WPTR_POLL_EN;
   4612		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
   4613
   4614		/* enable doorbell? */
   4615		mqd->queue_state.cp_hqd_pq_doorbell_control =
   4616			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
   4617		if (use_doorbell)
   4618			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
   4619		else
   4620			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
   4621		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
   4622		       mqd->queue_state.cp_hqd_pq_doorbell_control);
   4623
   4624		/* disable the queue if it's active */
   4625		mqd->queue_state.cp_hqd_dequeue_request = 0;
   4626		mqd->queue_state.cp_hqd_pq_rptr = 0;
   4627		mqd->queue_state.cp_hqd_pq_wptr= 0;
   4628		if (RREG32(CP_HQD_ACTIVE) & 1) {
   4629			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
   4630			for (j = 0; j < rdev->usec_timeout; j++) {
   4631				if (!(RREG32(CP_HQD_ACTIVE) & 1))
   4632					break;
   4633				udelay(1);
   4634			}
   4635			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
   4636			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
   4637			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
   4638		}
   4639
   4640		/* set the pointer to the MQD */
   4641		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
   4642		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
   4643		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
   4644		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
   4645		/* set MQD vmid to 0 */
   4646		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
   4647		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
   4648		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
   4649
   4650		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
   4651		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
   4652		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
   4653		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
   4654		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
   4655		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
   4656
   4657		/* set up the HQD, this is similar to CP_RB0_CNTL */
   4658		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
   4659		mqd->queue_state.cp_hqd_pq_control &=
   4660			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
   4661
   4662		mqd->queue_state.cp_hqd_pq_control |=
   4663			order_base_2(rdev->ring[idx].ring_size / 8);
   4664		mqd->queue_state.cp_hqd_pq_control |=
   4665			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
   4666#ifdef __BIG_ENDIAN
   4667		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
   4668#endif
   4669		mqd->queue_state.cp_hqd_pq_control &=
   4670			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
   4671		mqd->queue_state.cp_hqd_pq_control |=
   4672			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
   4673		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
   4674
   4675		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
   4676		if (i == 0)
   4677			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
   4678		else
   4679			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
   4680		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
   4681		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
   4682		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
   4683		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
   4684		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
   4685
   4686		/* set the wb address wether it's enabled or not */
   4687		if (i == 0)
   4688			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
   4689		else
   4690			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
   4691		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
   4692		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
   4693			upper_32_bits(wb_gpu_addr) & 0xffff;
   4694		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
   4695		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
   4696		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
   4697		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
   4698
   4699		/* enable the doorbell if requested */
   4700		if (use_doorbell) {
   4701			mqd->queue_state.cp_hqd_pq_doorbell_control =
   4702				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
   4703			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
   4704			mqd->queue_state.cp_hqd_pq_doorbell_control |=
   4705				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
   4706			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
   4707			mqd->queue_state.cp_hqd_pq_doorbell_control &=
   4708				~(DOORBELL_SOURCE | DOORBELL_HIT);
   4709
   4710		} else {
   4711			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
   4712		}
   4713		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
   4714		       mqd->queue_state.cp_hqd_pq_doorbell_control);
   4715
   4716		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
   4717		rdev->ring[idx].wptr = 0;
   4718		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
   4719		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
   4720		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
   4721
   4722		/* set the vmid for the queue */
   4723		mqd->queue_state.cp_hqd_vmid = 0;
   4724		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
   4725
   4726		/* activate the queue */
   4727		mqd->queue_state.cp_hqd_active = 1;
   4728		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
   4729
   4730		cik_srbm_select(rdev, 0, 0, 0, 0);
   4731		mutex_unlock(&rdev->srbm_mutex);
   4732
   4733		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
   4734		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
   4735
   4736		rdev->ring[idx].ready = true;
   4737		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
   4738		if (r)
   4739			rdev->ring[idx].ready = false;
   4740	}
   4741
   4742	return 0;
   4743}
   4744
   4745static void cik_cp_enable(struct radeon_device *rdev, bool enable)
   4746{
   4747	cik_cp_gfx_enable(rdev, enable);
   4748	cik_cp_compute_enable(rdev, enable);
   4749}
   4750
   4751static int cik_cp_load_microcode(struct radeon_device *rdev)
   4752{
   4753	int r;
   4754
   4755	r = cik_cp_gfx_load_microcode(rdev);
   4756	if (r)
   4757		return r;
   4758	r = cik_cp_compute_load_microcode(rdev);
   4759	if (r)
   4760		return r;
   4761
   4762	return 0;
   4763}
   4764
   4765static void cik_cp_fini(struct radeon_device *rdev)
   4766{
   4767	cik_cp_gfx_fini(rdev);
   4768	cik_cp_compute_fini(rdev);
   4769}
   4770
   4771static int cik_cp_resume(struct radeon_device *rdev)
   4772{
   4773	int r;
   4774
   4775	cik_enable_gui_idle_interrupt(rdev, false);
   4776
   4777	r = cik_cp_load_microcode(rdev);
   4778	if (r)
   4779		return r;
   4780
   4781	r = cik_cp_gfx_resume(rdev);
   4782	if (r)
   4783		return r;
   4784	r = cik_cp_compute_resume(rdev);
   4785	if (r)
   4786		return r;
   4787
   4788	cik_enable_gui_idle_interrupt(rdev, true);
   4789
   4790	return 0;
   4791}
   4792
   4793static void cik_print_gpu_status_regs(struct radeon_device *rdev)
   4794{
   4795	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
   4796		RREG32(GRBM_STATUS));
   4797	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
   4798		RREG32(GRBM_STATUS2));
   4799	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
   4800		RREG32(GRBM_STATUS_SE0));
   4801	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
   4802		RREG32(GRBM_STATUS_SE1));
   4803	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
   4804		RREG32(GRBM_STATUS_SE2));
   4805	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
   4806		RREG32(GRBM_STATUS_SE3));
   4807	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
   4808		RREG32(SRBM_STATUS));
   4809	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
   4810		RREG32(SRBM_STATUS2));
   4811	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
   4812		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
   4813	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
   4814		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
   4815	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
   4816	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
   4817		 RREG32(CP_STALLED_STAT1));
   4818	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
   4819		 RREG32(CP_STALLED_STAT2));
   4820	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
   4821		 RREG32(CP_STALLED_STAT3));
   4822	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
   4823		 RREG32(CP_CPF_BUSY_STAT));
   4824	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
   4825		 RREG32(CP_CPF_STALLED_STAT1));
   4826	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
   4827	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
   4828	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
   4829		 RREG32(CP_CPC_STALLED_STAT1));
   4830	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
   4831}
   4832
   4833/**
   4834 * cik_gpu_check_soft_reset - check which blocks are busy
   4835 *
   4836 * @rdev: radeon_device pointer
   4837 *
   4838 * Check which blocks are busy and return the relevant reset
   4839 * mask to be used by cik_gpu_soft_reset().
   4840 * Returns a mask of the blocks to be reset.
   4841 */
   4842u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
   4843{
   4844	u32 reset_mask = 0;
   4845	u32 tmp;
   4846
   4847	/* GRBM_STATUS */
   4848	tmp = RREG32(GRBM_STATUS);
   4849	if (tmp & (PA_BUSY | SC_BUSY |
   4850		   BCI_BUSY | SX_BUSY |
   4851		   TA_BUSY | VGT_BUSY |
   4852		   DB_BUSY | CB_BUSY |
   4853		   GDS_BUSY | SPI_BUSY |
   4854		   IA_BUSY | IA_BUSY_NO_DMA))
   4855		reset_mask |= RADEON_RESET_GFX;
   4856
   4857	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
   4858		reset_mask |= RADEON_RESET_CP;
   4859
   4860	/* GRBM_STATUS2 */
   4861	tmp = RREG32(GRBM_STATUS2);
   4862	if (tmp & RLC_BUSY)
   4863		reset_mask |= RADEON_RESET_RLC;
   4864
   4865	/* SDMA0_STATUS_REG */
   4866	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
   4867	if (!(tmp & SDMA_IDLE))
   4868		reset_mask |= RADEON_RESET_DMA;
   4869
   4870	/* SDMA1_STATUS_REG */
   4871	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
   4872	if (!(tmp & SDMA_IDLE))
   4873		reset_mask |= RADEON_RESET_DMA1;
   4874
   4875	/* SRBM_STATUS2 */
   4876	tmp = RREG32(SRBM_STATUS2);
   4877	if (tmp & SDMA_BUSY)
   4878		reset_mask |= RADEON_RESET_DMA;
   4879
   4880	if (tmp & SDMA1_BUSY)
   4881		reset_mask |= RADEON_RESET_DMA1;
   4882
   4883	/* SRBM_STATUS */
   4884	tmp = RREG32(SRBM_STATUS);
   4885
   4886	if (tmp & IH_BUSY)
   4887		reset_mask |= RADEON_RESET_IH;
   4888
   4889	if (tmp & SEM_BUSY)
   4890		reset_mask |= RADEON_RESET_SEM;
   4891
   4892	if (tmp & GRBM_RQ_PENDING)
   4893		reset_mask |= RADEON_RESET_GRBM;
   4894
   4895	if (tmp & VMC_BUSY)
   4896		reset_mask |= RADEON_RESET_VMC;
   4897
   4898	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
   4899		   MCC_BUSY | MCD_BUSY))
   4900		reset_mask |= RADEON_RESET_MC;
   4901
   4902	if (evergreen_is_display_hung(rdev))
   4903		reset_mask |= RADEON_RESET_DISPLAY;
   4904
   4905	/* Skip MC reset as it's mostly likely not hung, just busy */
   4906	if (reset_mask & RADEON_RESET_MC) {
   4907		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
   4908		reset_mask &= ~RADEON_RESET_MC;
   4909	}
   4910
   4911	return reset_mask;
   4912}
   4913
   4914/**
   4915 * cik_gpu_soft_reset - soft reset GPU
   4916 *
   4917 * @rdev: radeon_device pointer
   4918 * @reset_mask: mask of which blocks to reset
   4919 *
   4920 * Soft reset the blocks specified in @reset_mask.
   4921 */
   4922static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
   4923{
   4924	struct evergreen_mc_save save;
   4925	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
   4926	u32 tmp;
   4927
   4928	if (reset_mask == 0)
   4929		return;
   4930
   4931	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
   4932
   4933	cik_print_gpu_status_regs(rdev);
   4934	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
   4935		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
   4936	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
   4937		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
   4938
   4939	/* disable CG/PG */
   4940	cik_fini_pg(rdev);
   4941	cik_fini_cg(rdev);
   4942
   4943	/* stop the rlc */
   4944	cik_rlc_stop(rdev);
   4945
   4946	/* Disable GFX parsing/prefetching */
   4947	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
   4948
   4949	/* Disable MEC parsing/prefetching */
   4950	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
   4951
   4952	if (reset_mask & RADEON_RESET_DMA) {
   4953		/* sdma0 */
   4954		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
   4955		tmp |= SDMA_HALT;
   4956		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
   4957	}
   4958	if (reset_mask & RADEON_RESET_DMA1) {
   4959		/* sdma1 */
   4960		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
   4961		tmp |= SDMA_HALT;
   4962		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
   4963	}
   4964
   4965	evergreen_mc_stop(rdev, &save);
   4966	if (evergreen_mc_wait_for_idle(rdev)) {
   4967		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
   4968	}
   4969
   4970	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
   4971		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
   4972
   4973	if (reset_mask & RADEON_RESET_CP) {
   4974		grbm_soft_reset |= SOFT_RESET_CP;
   4975
   4976		srbm_soft_reset |= SOFT_RESET_GRBM;
   4977	}
   4978
   4979	if (reset_mask & RADEON_RESET_DMA)
   4980		srbm_soft_reset |= SOFT_RESET_SDMA;
   4981
   4982	if (reset_mask & RADEON_RESET_DMA1)
   4983		srbm_soft_reset |= SOFT_RESET_SDMA1;
   4984
   4985	if (reset_mask & RADEON_RESET_DISPLAY)
   4986		srbm_soft_reset |= SOFT_RESET_DC;
   4987
   4988	if (reset_mask & RADEON_RESET_RLC)
   4989		grbm_soft_reset |= SOFT_RESET_RLC;
   4990
   4991	if (reset_mask & RADEON_RESET_SEM)
   4992		srbm_soft_reset |= SOFT_RESET_SEM;
   4993
   4994	if (reset_mask & RADEON_RESET_IH)
   4995		srbm_soft_reset |= SOFT_RESET_IH;
   4996
   4997	if (reset_mask & RADEON_RESET_GRBM)
   4998		srbm_soft_reset |= SOFT_RESET_GRBM;
   4999
   5000	if (reset_mask & RADEON_RESET_VMC)
   5001		srbm_soft_reset |= SOFT_RESET_VMC;
   5002
   5003	if (!(rdev->flags & RADEON_IS_IGP)) {
   5004		if (reset_mask & RADEON_RESET_MC)
   5005			srbm_soft_reset |= SOFT_RESET_MC;
   5006	}
   5007
   5008	if (grbm_soft_reset) {
   5009		tmp = RREG32(GRBM_SOFT_RESET);
   5010		tmp |= grbm_soft_reset;
   5011		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
   5012		WREG32(GRBM_SOFT_RESET, tmp);
   5013		tmp = RREG32(GRBM_SOFT_RESET);
   5014
   5015		udelay(50);
   5016
   5017		tmp &= ~grbm_soft_reset;
   5018		WREG32(GRBM_SOFT_RESET, tmp);
   5019		tmp = RREG32(GRBM_SOFT_RESET);
   5020	}
   5021
   5022	if (srbm_soft_reset) {
   5023		tmp = RREG32(SRBM_SOFT_RESET);
   5024		tmp |= srbm_soft_reset;
   5025		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
   5026		WREG32(SRBM_SOFT_RESET, tmp);
   5027		tmp = RREG32(SRBM_SOFT_RESET);
   5028
   5029		udelay(50);
   5030
   5031		tmp &= ~srbm_soft_reset;
   5032		WREG32(SRBM_SOFT_RESET, tmp);
   5033		tmp = RREG32(SRBM_SOFT_RESET);
   5034	}
   5035
   5036	/* Wait a little for things to settle down */
   5037	udelay(50);
   5038
   5039	evergreen_mc_resume(rdev, &save);
   5040	udelay(50);
   5041
   5042	cik_print_gpu_status_regs(rdev);
   5043}
   5044
   5045struct kv_reset_save_regs {
   5046	u32 gmcon_reng_execute;
   5047	u32 gmcon_misc;
   5048	u32 gmcon_misc3;
   5049};
   5050
   5051static void kv_save_regs_for_reset(struct radeon_device *rdev,
   5052				   struct kv_reset_save_regs *save)
   5053{
   5054	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
   5055	save->gmcon_misc = RREG32(GMCON_MISC);
   5056	save->gmcon_misc3 = RREG32(GMCON_MISC3);
   5057
   5058	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
   5059	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
   5060						STCTRL_STUTTER_EN));
   5061}
   5062
   5063static void kv_restore_regs_for_reset(struct radeon_device *rdev,
   5064				      struct kv_reset_save_regs *save)
   5065{
   5066	int i;
   5067
   5068	WREG32(GMCON_PGFSM_WRITE, 0);
   5069	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
   5070
   5071	for (i = 0; i < 5; i++)
   5072		WREG32(GMCON_PGFSM_WRITE, 0);
   5073
   5074	WREG32(GMCON_PGFSM_WRITE, 0);
   5075	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
   5076
   5077	for (i = 0; i < 5; i++)
   5078		WREG32(GMCON_PGFSM_WRITE, 0);
   5079
   5080	WREG32(GMCON_PGFSM_WRITE, 0x210000);
   5081	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
   5082
   5083	for (i = 0; i < 5; i++)
   5084		WREG32(GMCON_PGFSM_WRITE, 0);
   5085
   5086	WREG32(GMCON_PGFSM_WRITE, 0x21003);
   5087	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
   5088
   5089	for (i = 0; i < 5; i++)
   5090		WREG32(GMCON_PGFSM_WRITE, 0);
   5091
   5092	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
   5093	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
   5094
   5095	for (i = 0; i < 5; i++)
   5096		WREG32(GMCON_PGFSM_WRITE, 0);
   5097
   5098	WREG32(GMCON_PGFSM_WRITE, 0);
   5099	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
   5100
   5101	for (i = 0; i < 5; i++)
   5102		WREG32(GMCON_PGFSM_WRITE, 0);
   5103
   5104	WREG32(GMCON_PGFSM_WRITE, 0x420000);
   5105	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
   5106
   5107	for (i = 0; i < 5; i++)
   5108		WREG32(GMCON_PGFSM_WRITE, 0);
   5109
   5110	WREG32(GMCON_PGFSM_WRITE, 0x120202);
   5111	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
   5112
   5113	for (i = 0; i < 5; i++)
   5114		WREG32(GMCON_PGFSM_WRITE, 0);
   5115
   5116	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
   5117	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
   5118
   5119	for (i = 0; i < 5; i++)
   5120		WREG32(GMCON_PGFSM_WRITE, 0);
   5121
   5122	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
   5123	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
   5124
   5125	for (i = 0; i < 5; i++)
   5126		WREG32(GMCON_PGFSM_WRITE, 0);
   5127
   5128	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
   5129	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
   5130
   5131	WREG32(GMCON_MISC3, save->gmcon_misc3);
   5132	WREG32(GMCON_MISC, save->gmcon_misc);
   5133	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
   5134}
   5135
   5136static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
   5137{
   5138	struct evergreen_mc_save save;
   5139	struct kv_reset_save_regs kv_save = { 0 };
   5140	u32 tmp, i;
   5141
   5142	dev_info(rdev->dev, "GPU pci config reset\n");
   5143
   5144	/* disable dpm? */
   5145
   5146	/* disable cg/pg */
   5147	cik_fini_pg(rdev);
   5148	cik_fini_cg(rdev);
   5149
   5150	/* Disable GFX parsing/prefetching */
   5151	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
   5152
   5153	/* Disable MEC parsing/prefetching */
   5154	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
   5155
   5156	/* sdma0 */
   5157	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
   5158	tmp |= SDMA_HALT;
   5159	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
   5160	/* sdma1 */
   5161	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
   5162	tmp |= SDMA_HALT;
   5163	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
   5164	/* XXX other engines? */
   5165
   5166	/* halt the rlc, disable cp internal ints */
   5167	cik_rlc_stop(rdev);
   5168
   5169	udelay(50);
   5170
   5171	/* disable mem access */
   5172	evergreen_mc_stop(rdev, &save);
   5173	if (evergreen_mc_wait_for_idle(rdev)) {
   5174		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
   5175	}
   5176
   5177	if (rdev->flags & RADEON_IS_IGP)
   5178		kv_save_regs_for_reset(rdev, &kv_save);
   5179
   5180	/* disable BM */
   5181	pci_clear_master(rdev->pdev);
   5182	/* reset */
   5183	radeon_pci_config_reset(rdev);
   5184
   5185	udelay(100);
   5186
   5187	/* wait for asic to come out of reset */
   5188	for (i = 0; i < rdev->usec_timeout; i++) {
   5189		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
   5190			break;
   5191		udelay(1);
   5192	}
   5193
   5194	/* does asic init need to be run first??? */
   5195	if (rdev->flags & RADEON_IS_IGP)
   5196		kv_restore_regs_for_reset(rdev, &kv_save);
   5197}
   5198
   5199/**
   5200 * cik_asic_reset - soft reset GPU
   5201 *
   5202 * @rdev: radeon_device pointer
   5203 * @hard: force hard reset
   5204 *
   5205 * Look up which blocks are hung and attempt
   5206 * to reset them.
   5207 * Returns 0 for success.
   5208 */
   5209int cik_asic_reset(struct radeon_device *rdev, bool hard)
   5210{
   5211	u32 reset_mask;
   5212
   5213	if (hard) {
   5214		cik_gpu_pci_config_reset(rdev);
   5215		return 0;
   5216	}
   5217
   5218	reset_mask = cik_gpu_check_soft_reset(rdev);
   5219
   5220	if (reset_mask)
   5221		r600_set_bios_scratch_engine_hung(rdev, true);
   5222
   5223	/* try soft reset */
   5224	cik_gpu_soft_reset(rdev, reset_mask);
   5225
   5226	reset_mask = cik_gpu_check_soft_reset(rdev);
   5227
   5228	/* try pci config reset */
   5229	if (reset_mask && radeon_hard_reset)
   5230		cik_gpu_pci_config_reset(rdev);
   5231
   5232	reset_mask = cik_gpu_check_soft_reset(rdev);
   5233
   5234	if (!reset_mask)
   5235		r600_set_bios_scratch_engine_hung(rdev, false);
   5236
   5237	return 0;
   5238}
   5239
   5240/**
   5241 * cik_gfx_is_lockup - check if the 3D engine is locked up
   5242 *
   5243 * @rdev: radeon_device pointer
   5244 * @ring: radeon_ring structure holding ring information
   5245 *
   5246 * Check if the 3D engine is locked up (CIK).
   5247 * Returns true if the engine is locked, false if not.
   5248 */
   5249bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
   5250{
   5251	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
   5252
   5253	if (!(reset_mask & (RADEON_RESET_GFX |
   5254			    RADEON_RESET_COMPUTE |
   5255			    RADEON_RESET_CP))) {
   5256		radeon_ring_lockup_update(rdev, ring);
   5257		return false;
   5258	}
   5259	return radeon_ring_test_lockup(rdev, ring);
   5260}
   5261
   5262/* MC */
   5263/**
   5264 * cik_mc_program - program the GPU memory controller
   5265 *
   5266 * @rdev: radeon_device pointer
   5267 *
   5268 * Set the location of vram, gart, and AGP in the GPU's
   5269 * physical address space (CIK).
   5270 */
   5271static void cik_mc_program(struct radeon_device *rdev)
   5272{
   5273	struct evergreen_mc_save save;
   5274	u32 tmp;
   5275	int i, j;
   5276
   5277	/* Initialize HDP */
   5278	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
   5279		WREG32((0x2c14 + j), 0x00000000);
   5280		WREG32((0x2c18 + j), 0x00000000);
   5281		WREG32((0x2c1c + j), 0x00000000);
   5282		WREG32((0x2c20 + j), 0x00000000);
   5283		WREG32((0x2c24 + j), 0x00000000);
   5284	}
   5285	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
   5286
   5287	evergreen_mc_stop(rdev, &save);
   5288	if (radeon_mc_wait_for_idle(rdev)) {
   5289		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
   5290	}
   5291	/* Lockout access through VGA aperture*/
   5292	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
   5293	/* Update configuration */
   5294	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
   5295	       rdev->mc.vram_start >> 12);
   5296	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
   5297	       rdev->mc.vram_end >> 12);
   5298	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
   5299	       rdev->vram_scratch.gpu_addr >> 12);
   5300	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
   5301	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
   5302	WREG32(MC_VM_FB_LOCATION, tmp);
   5303	/* XXX double check these! */
   5304	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
   5305	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
   5306	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
   5307	WREG32(MC_VM_AGP_BASE, 0);
   5308	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
   5309	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
   5310	if (radeon_mc_wait_for_idle(rdev)) {
   5311		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
   5312	}
   5313	evergreen_mc_resume(rdev, &save);
   5314	/* we need to own VRAM, so turn off the VGA renderer here
   5315	 * to stop it overwriting our objects */
   5316	rv515_vga_render_disable(rdev);
   5317}
   5318
   5319/**
   5320 * cik_mc_init - initialize the memory controller driver params
   5321 *
   5322 * @rdev: radeon_device pointer
   5323 *
   5324 * Look up the amount of vram, vram width, and decide how to place
   5325 * vram and gart within the GPU's physical address space (CIK).
   5326 * Returns 0 for success.
   5327 */
   5328static int cik_mc_init(struct radeon_device *rdev)
   5329{
   5330	u32 tmp;
   5331	int chansize, numchan;
   5332
   5333	/* Get VRAM informations */
   5334	rdev->mc.vram_is_ddr = true;
   5335	tmp = RREG32(MC_ARB_RAMCFG);
   5336	if (tmp & CHANSIZE_MASK) {
   5337		chansize = 64;
   5338	} else {
   5339		chansize = 32;
   5340	}
   5341	tmp = RREG32(MC_SHARED_CHMAP);
   5342	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
   5343	case 0:
   5344	default:
   5345		numchan = 1;
   5346		break;
   5347	case 1:
   5348		numchan = 2;
   5349		break;
   5350	case 2:
   5351		numchan = 4;
   5352		break;
   5353	case 3:
   5354		numchan = 8;
   5355		break;
   5356	case 4:
   5357		numchan = 3;
   5358		break;
   5359	case 5:
   5360		numchan = 6;
   5361		break;
   5362	case 6:
   5363		numchan = 10;
   5364		break;
   5365	case 7:
   5366		numchan = 12;
   5367		break;
   5368	case 8:
   5369		numchan = 16;
   5370		break;
   5371	}
   5372	rdev->mc.vram_width = numchan * chansize;
   5373	/* Could aper size report 0 ? */
   5374	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
   5375	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
   5376	/* size in MB on si */
   5377	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
   5378	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
   5379	rdev->mc.visible_vram_size = rdev->mc.aper_size;
   5380	si_vram_gtt_location(rdev, &rdev->mc);
   5381	radeon_update_bandwidth_info(rdev);
   5382
   5383	return 0;
   5384}
   5385
   5386/*
   5387 * GART
   5388 * VMID 0 is the physical GPU addresses as used by the kernel.
   5389 * VMIDs 1-15 are used for userspace clients and are handled
   5390 * by the radeon vm/hsa code.
   5391 */
   5392/**
   5393 * cik_pcie_gart_tlb_flush - gart tlb flush callback
   5394 *
   5395 * @rdev: radeon_device pointer
   5396 *
   5397 * Flush the TLB for the VMID 0 page table (CIK).
   5398 */
   5399void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
   5400{
   5401	/* flush hdp cache */
   5402	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
   5403
   5404	/* bits 0-15 are the VM contexts0-15 */
   5405	WREG32(VM_INVALIDATE_REQUEST, 0x1);
   5406}
   5407
   5408/**
   5409 * cik_pcie_gart_enable - gart enable
   5410 *
   5411 * @rdev: radeon_device pointer
   5412 *
   5413 * This sets up the TLBs, programs the page tables for VMID0,
   5414 * sets up the hw for VMIDs 1-15 which are allocated on
   5415 * demand, and sets up the global locations for the LDS, GDS,
   5416 * and GPUVM for FSA64 clients (CIK).
   5417 * Returns 0 for success, errors for failure.
   5418 */
   5419static int cik_pcie_gart_enable(struct radeon_device *rdev)
   5420{
   5421	int r, i;
   5422
   5423	if (rdev->gart.robj == NULL) {
   5424		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
   5425		return -EINVAL;
   5426	}
   5427	r = radeon_gart_table_vram_pin(rdev);
   5428	if (r)
   5429		return r;
   5430	/* Setup TLB control */
   5431	WREG32(MC_VM_MX_L1_TLB_CNTL,
   5432	       (0xA << 7) |
   5433	       ENABLE_L1_TLB |
   5434	       ENABLE_L1_FRAGMENT_PROCESSING |
   5435	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
   5436	       ENABLE_ADVANCED_DRIVER_MODEL |
   5437	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
   5438	/* Setup L2 cache */
   5439	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
   5440	       ENABLE_L2_FRAGMENT_PROCESSING |
   5441	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
   5442	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
   5443	       EFFECTIVE_L2_QUEUE_SIZE(7) |
   5444	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
   5445	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
   5446	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
   5447	       BANK_SELECT(4) |
   5448	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
   5449	/* setup context0 */
   5450	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
   5451	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
   5452	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
   5453	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
   5454			(u32)(rdev->dummy_page.addr >> 12));
   5455	WREG32(VM_CONTEXT0_CNTL2, 0);
   5456	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
   5457				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
   5458
   5459	WREG32(0x15D4, 0);
   5460	WREG32(0x15D8, 0);
   5461	WREG32(0x15DC, 0);
   5462
   5463	/* restore context1-15 */
   5464	/* set vm size, must be a multiple of 4 */
   5465	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
   5466	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
   5467	for (i = 1; i < 16; i++) {
   5468		if (i < 8)
   5469			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
   5470			       rdev->vm_manager.saved_table_addr[i]);
   5471		else
   5472			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
   5473			       rdev->vm_manager.saved_table_addr[i]);
   5474	}
   5475
   5476	/* enable context1-15 */
   5477	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
   5478	       (u32)(rdev->dummy_page.addr >> 12));
   5479	WREG32(VM_CONTEXT1_CNTL2, 4);
   5480	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
   5481				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
   5482				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
   5483				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
   5484				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
   5485				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
   5486				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
   5487				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
   5488				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
   5489				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
   5490				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
   5491				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
   5492				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
   5493				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
   5494
   5495	if (rdev->family == CHIP_KAVERI) {
   5496		u32 tmp = RREG32(CHUB_CONTROL);
   5497		tmp &= ~BYPASS_VM;
   5498		WREG32(CHUB_CONTROL, tmp);
   5499	}
   5500
   5501	/* XXX SH_MEM regs */
   5502	/* where to put LDS, scratch, GPUVM in FSA64 space */
   5503	mutex_lock(&rdev->srbm_mutex);
   5504	for (i = 0; i < 16; i++) {
   5505		cik_srbm_select(rdev, 0, 0, 0, i);
   5506		/* CP and shaders */
   5507		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
   5508		WREG32(SH_MEM_APE1_BASE, 1);
   5509		WREG32(SH_MEM_APE1_LIMIT, 0);
   5510		WREG32(SH_MEM_BASES, 0);
   5511		/* SDMA GFX */
   5512		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
   5513		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
   5514		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
   5515		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
   5516		/* XXX SDMA RLC - todo */
   5517	}
   5518	cik_srbm_select(rdev, 0, 0, 0, 0);
   5519	mutex_unlock(&rdev->srbm_mutex);
   5520
   5521	cik_pcie_gart_tlb_flush(rdev);
   5522	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
   5523		 (unsigned)(rdev->mc.gtt_size >> 20),
   5524		 (unsigned long long)rdev->gart.table_addr);
   5525	rdev->gart.ready = true;
   5526	return 0;
   5527}
   5528
   5529/**
   5530 * cik_pcie_gart_disable - gart disable
   5531 *
   5532 * @rdev: radeon_device pointer
   5533 *
   5534 * This disables all VM page table (CIK).
   5535 */
   5536static void cik_pcie_gart_disable(struct radeon_device *rdev)
   5537{
   5538	unsigned i;
   5539
   5540	for (i = 1; i < 16; ++i) {
   5541		uint32_t reg;
   5542		if (i < 8)
   5543			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
   5544		else
   5545			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
   5546		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
   5547	}
   5548
   5549	/* Disable all tables */
   5550	WREG32(VM_CONTEXT0_CNTL, 0);
   5551	WREG32(VM_CONTEXT1_CNTL, 0);
   5552	/* Setup TLB control */
   5553	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
   5554	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
   5555	/* Setup L2 cache */
   5556	WREG32(VM_L2_CNTL,
   5557	       ENABLE_L2_FRAGMENT_PROCESSING |
   5558	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
   5559	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
   5560	       EFFECTIVE_L2_QUEUE_SIZE(7) |
   5561	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
   5562	WREG32(VM_L2_CNTL2, 0);
   5563	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
   5564	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
   5565	radeon_gart_table_vram_unpin(rdev);
   5566}
   5567
   5568/**
   5569 * cik_pcie_gart_fini - vm fini callback
   5570 *
   5571 * @rdev: radeon_device pointer
   5572 *
   5573 * Tears down the driver GART/VM setup (CIK).
   5574 */
   5575static void cik_pcie_gart_fini(struct radeon_device *rdev)
   5576{
   5577	cik_pcie_gart_disable(rdev);
   5578	radeon_gart_table_vram_free(rdev);
   5579	radeon_gart_fini(rdev);
   5580}
   5581
   5582/* vm parser */
   5583/**
   5584 * cik_ib_parse - vm ib_parse callback
   5585 *
   5586 * @rdev: radeon_device pointer
   5587 * @ib: indirect buffer pointer
   5588 *
   5589 * CIK uses hw IB checking so this is a nop (CIK).
   5590 */
   5591int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
   5592{
   5593	return 0;
   5594}
   5595
   5596/*
   5597 * vm
   5598 * VMID 0 is the physical GPU addresses as used by the kernel.
   5599 * VMIDs 1-15 are used for userspace clients and are handled
   5600 * by the radeon vm/hsa code.
   5601 */
   5602/**
   5603 * cik_vm_init - cik vm init callback
   5604 *
   5605 * @rdev: radeon_device pointer
   5606 *
   5607 * Inits cik specific vm parameters (number of VMs, base of vram for
   5608 * VMIDs 1-15) (CIK).
   5609 * Returns 0 for success.
   5610 */
   5611int cik_vm_init(struct radeon_device *rdev)
   5612{
   5613	/*
   5614	 * number of VMs
   5615	 * VMID 0 is reserved for System
   5616	 * radeon graphics/compute will use VMIDs 1-15
   5617	 */
   5618	rdev->vm_manager.nvm = 16;
   5619	/* base offset of vram pages */
   5620	if (rdev->flags & RADEON_IS_IGP) {
   5621		u64 tmp = RREG32(MC_VM_FB_OFFSET);
   5622		tmp <<= 22;
   5623		rdev->vm_manager.vram_base_offset = tmp;
   5624	} else
   5625		rdev->vm_manager.vram_base_offset = 0;
   5626
   5627	return 0;
   5628}
   5629
   5630/**
   5631 * cik_vm_fini - cik vm fini callback
   5632 *
   5633 * @rdev: radeon_device pointer
   5634 *
   5635 * Tear down any asic specific VM setup (CIK).
   5636 */
   5637void cik_vm_fini(struct radeon_device *rdev)
   5638{
   5639}
   5640
   5641/**
   5642 * cik_vm_decode_fault - print human readable fault info
   5643 *
   5644 * @rdev: radeon_device pointer
   5645 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
   5646 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
   5647 * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
   5648 *
   5649 * Print human readable fault information (CIK).
   5650 */
   5651static void cik_vm_decode_fault(struct radeon_device *rdev,
   5652				u32 status, u32 addr, u32 mc_client)
   5653{
   5654	u32 mc_id;
   5655	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
   5656	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
   5657	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
   5658		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
   5659
   5660	if (rdev->family == CHIP_HAWAII)
   5661		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
   5662	else
   5663		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
   5664
   5665	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
   5666	       protections, vmid, addr,
   5667	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
   5668	       block, mc_client, mc_id);
   5669}
   5670
   5671/*
   5672 * cik_vm_flush - cik vm flush using the CP
   5673 *
   5674 * Update the page table base and flush the VM TLB
   5675 * using the CP (CIK).
   5676 */
   5677void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
   5678		  unsigned vm_id, uint64_t pd_addr)
   5679{
   5680	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
   5681
   5682	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5683	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
   5684				 WRITE_DATA_DST_SEL(0)));
   5685	if (vm_id < 8) {
   5686		radeon_ring_write(ring,
   5687				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
   5688	} else {
   5689		radeon_ring_write(ring,
   5690				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
   5691	}
   5692	radeon_ring_write(ring, 0);
   5693	radeon_ring_write(ring, pd_addr >> 12);
   5694
   5695	/* update SH_MEM_* regs */
   5696	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5697	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
   5698				 WRITE_DATA_DST_SEL(0)));
   5699	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
   5700	radeon_ring_write(ring, 0);
   5701	radeon_ring_write(ring, VMID(vm_id));
   5702
   5703	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
   5704	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
   5705				 WRITE_DATA_DST_SEL(0)));
   5706	radeon_ring_write(ring, SH_MEM_BASES >> 2);
   5707	radeon_ring_write(ring, 0);
   5708
   5709	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
   5710	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
   5711	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
   5712	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
   5713
   5714	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5715	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
   5716				 WRITE_DATA_DST_SEL(0)));
   5717	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
   5718	radeon_ring_write(ring, 0);
   5719	radeon_ring_write(ring, VMID(0));
   5720
   5721	/* HDP flush */
   5722	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
   5723
   5724	/* bits 0-15 are the VM contexts0-15 */
   5725	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
   5726	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
   5727				 WRITE_DATA_DST_SEL(0)));
   5728	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
   5729	radeon_ring_write(ring, 0);
   5730	radeon_ring_write(ring, 1 << vm_id);
   5731
   5732	/* wait for the invalidate to complete */
   5733	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
   5734	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
   5735				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
   5736				 WAIT_REG_MEM_ENGINE(0))); /* me */
   5737	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
   5738	radeon_ring_write(ring, 0);
   5739	radeon_ring_write(ring, 0); /* ref */
   5740	radeon_ring_write(ring, 0); /* mask */
   5741	radeon_ring_write(ring, 0x20); /* poll interval */
   5742
   5743	/* compute doesn't have PFP */
   5744	if (usepfp) {
   5745		/* sync PFP to ME, otherwise we might get invalid PFP reads */
   5746		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
   5747		radeon_ring_write(ring, 0x0);
   5748	}
   5749}
   5750
   5751/*
   5752 * RLC
   5753 * The RLC is a multi-purpose microengine that handles a
   5754 * variety of functions, the most important of which is
   5755 * the interrupt controller.
   5756 */
   5757static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
   5758					  bool enable)
   5759{
   5760	u32 tmp = RREG32(CP_INT_CNTL_RING0);
   5761
   5762	if (enable)
   5763		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
   5764	else
   5765		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
   5766	WREG32(CP_INT_CNTL_RING0, tmp);
   5767}
   5768
   5769static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
   5770{
   5771	u32 tmp;
   5772
   5773	tmp = RREG32(RLC_LB_CNTL);
   5774	if (enable)
   5775		tmp |= LOAD_BALANCE_ENABLE;
   5776	else
   5777		tmp &= ~LOAD_BALANCE_ENABLE;
   5778	WREG32(RLC_LB_CNTL, tmp);
   5779}
   5780
   5781static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
   5782{
   5783	u32 i, j, k;
   5784	u32 mask;
   5785
   5786	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
   5787		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
   5788			cik_select_se_sh(rdev, i, j);
   5789			for (k = 0; k < rdev->usec_timeout; k++) {
   5790				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
   5791					break;
   5792				udelay(1);
   5793			}
   5794		}
   5795	}
   5796	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   5797
   5798	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
   5799	for (k = 0; k < rdev->usec_timeout; k++) {
   5800		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
   5801			break;
   5802		udelay(1);
   5803	}
   5804}
   5805
   5806static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
   5807{
   5808	u32 tmp;
   5809
   5810	tmp = RREG32(RLC_CNTL);
   5811	if (tmp != rlc)
   5812		WREG32(RLC_CNTL, rlc);
   5813}
   5814
   5815static u32 cik_halt_rlc(struct radeon_device *rdev)
   5816{
   5817	u32 data, orig;
   5818
   5819	orig = data = RREG32(RLC_CNTL);
   5820
   5821	if (data & RLC_ENABLE) {
   5822		u32 i;
   5823
   5824		data &= ~RLC_ENABLE;
   5825		WREG32(RLC_CNTL, data);
   5826
   5827		for (i = 0; i < rdev->usec_timeout; i++) {
   5828			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
   5829				break;
   5830			udelay(1);
   5831		}
   5832
   5833		cik_wait_for_rlc_serdes(rdev);
   5834	}
   5835
   5836	return orig;
   5837}
   5838
   5839void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
   5840{
   5841	u32 tmp, i, mask;
   5842
   5843	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
   5844	WREG32(RLC_GPR_REG2, tmp);
   5845
   5846	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
   5847	for (i = 0; i < rdev->usec_timeout; i++) {
   5848		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
   5849			break;
   5850		udelay(1);
   5851	}
   5852
   5853	for (i = 0; i < rdev->usec_timeout; i++) {
   5854		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
   5855			break;
   5856		udelay(1);
   5857	}
   5858}
   5859
   5860void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
   5861{
   5862	u32 tmp;
   5863
   5864	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
   5865	WREG32(RLC_GPR_REG2, tmp);
   5866}
   5867
   5868/**
   5869 * cik_rlc_stop - stop the RLC ME
   5870 *
   5871 * @rdev: radeon_device pointer
   5872 *
   5873 * Halt the RLC ME (MicroEngine) (CIK).
   5874 */
   5875static void cik_rlc_stop(struct radeon_device *rdev)
   5876{
   5877	WREG32(RLC_CNTL, 0);
   5878
   5879	cik_enable_gui_idle_interrupt(rdev, false);
   5880
   5881	cik_wait_for_rlc_serdes(rdev);
   5882}
   5883
   5884/**
   5885 * cik_rlc_start - start the RLC ME
   5886 *
   5887 * @rdev: radeon_device pointer
   5888 *
   5889 * Unhalt the RLC ME (MicroEngine) (CIK).
   5890 */
   5891static void cik_rlc_start(struct radeon_device *rdev)
   5892{
   5893	WREG32(RLC_CNTL, RLC_ENABLE);
   5894
   5895	cik_enable_gui_idle_interrupt(rdev, true);
   5896
   5897	udelay(50);
   5898}
   5899
   5900/**
   5901 * cik_rlc_resume - setup the RLC hw
   5902 *
   5903 * @rdev: radeon_device pointer
   5904 *
   5905 * Initialize the RLC registers, load the ucode,
   5906 * and start the RLC (CIK).
   5907 * Returns 0 for success, -EINVAL if the ucode is not available.
   5908 */
   5909static int cik_rlc_resume(struct radeon_device *rdev)
   5910{
   5911	u32 i, size, tmp;
   5912
   5913	if (!rdev->rlc_fw)
   5914		return -EINVAL;
   5915
   5916	cik_rlc_stop(rdev);
   5917
   5918	/* disable CG */
   5919	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
   5920	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
   5921
   5922	si_rlc_reset(rdev);
   5923
   5924	cik_init_pg(rdev);
   5925
   5926	cik_init_cg(rdev);
   5927
   5928	WREG32(RLC_LB_CNTR_INIT, 0);
   5929	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
   5930
   5931	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   5932	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
   5933	WREG32(RLC_LB_PARAMS, 0x00600408);
   5934	WREG32(RLC_LB_CNTL, 0x80000004);
   5935
   5936	WREG32(RLC_MC_CNTL, 0);
   5937	WREG32(RLC_UCODE_CNTL, 0);
   5938
   5939	if (rdev->new_fw) {
   5940		const struct rlc_firmware_header_v1_0 *hdr =
   5941			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
   5942		const __le32 *fw_data = (const __le32 *)
   5943			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   5944
   5945		radeon_ucode_print_rlc_hdr(&hdr->header);
   5946
   5947		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
   5948		WREG32(RLC_GPM_UCODE_ADDR, 0);
   5949		for (i = 0; i < size; i++)
   5950			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
   5951		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
   5952	} else {
   5953		const __be32 *fw_data;
   5954
   5955		switch (rdev->family) {
   5956		case CHIP_BONAIRE:
   5957		case CHIP_HAWAII:
   5958		default:
   5959			size = BONAIRE_RLC_UCODE_SIZE;
   5960			break;
   5961		case CHIP_KAVERI:
   5962			size = KV_RLC_UCODE_SIZE;
   5963			break;
   5964		case CHIP_KABINI:
   5965			size = KB_RLC_UCODE_SIZE;
   5966			break;
   5967		case CHIP_MULLINS:
   5968			size = ML_RLC_UCODE_SIZE;
   5969			break;
   5970		}
   5971
   5972		fw_data = (const __be32 *)rdev->rlc_fw->data;
   5973		WREG32(RLC_GPM_UCODE_ADDR, 0);
   5974		for (i = 0; i < size; i++)
   5975			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
   5976		WREG32(RLC_GPM_UCODE_ADDR, 0);
   5977	}
   5978
   5979	/* XXX - find out what chips support lbpw */
   5980	cik_enable_lbpw(rdev, false);
   5981
   5982	if (rdev->family == CHIP_BONAIRE)
   5983		WREG32(RLC_DRIVER_DMA_STATUS, 0);
   5984
   5985	cik_rlc_start(rdev);
   5986
   5987	return 0;
   5988}
   5989
   5990static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
   5991{
   5992	u32 data, orig, tmp, tmp2;
   5993
   5994	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
   5995
   5996	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
   5997		cik_enable_gui_idle_interrupt(rdev, true);
   5998
   5999		tmp = cik_halt_rlc(rdev);
   6000
   6001		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   6002		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
   6003		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
   6004		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
   6005		WREG32(RLC_SERDES_WR_CTRL, tmp2);
   6006
   6007		cik_update_rlc(rdev, tmp);
   6008
   6009		data |= CGCG_EN | CGLS_EN;
   6010	} else {
   6011		cik_enable_gui_idle_interrupt(rdev, false);
   6012
   6013		RREG32(CB_CGTT_SCLK_CTRL);
   6014		RREG32(CB_CGTT_SCLK_CTRL);
   6015		RREG32(CB_CGTT_SCLK_CTRL);
   6016		RREG32(CB_CGTT_SCLK_CTRL);
   6017
   6018		data &= ~(CGCG_EN | CGLS_EN);
   6019	}
   6020
   6021	if (orig != data)
   6022		WREG32(RLC_CGCG_CGLS_CTRL, data);
   6023
   6024}
   6025
   6026static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
   6027{
   6028	u32 data, orig, tmp = 0;
   6029
   6030	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
   6031		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
   6032			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
   6033				orig = data = RREG32(CP_MEM_SLP_CNTL);
   6034				data |= CP_MEM_LS_EN;
   6035				if (orig != data)
   6036					WREG32(CP_MEM_SLP_CNTL, data);
   6037			}
   6038		}
   6039
   6040		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
   6041		data |= 0x00000001;
   6042		data &= 0xfffffffd;
   6043		if (orig != data)
   6044			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
   6045
   6046		tmp = cik_halt_rlc(rdev);
   6047
   6048		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   6049		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
   6050		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
   6051		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
   6052		WREG32(RLC_SERDES_WR_CTRL, data);
   6053
   6054		cik_update_rlc(rdev, tmp);
   6055
   6056		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
   6057			orig = data = RREG32(CGTS_SM_CTRL_REG);
   6058			data &= ~SM_MODE_MASK;
   6059			data |= SM_MODE(0x2);
   6060			data |= SM_MODE_ENABLE;
   6061			data &= ~CGTS_OVERRIDE;
   6062			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
   6063			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
   6064				data &= ~CGTS_LS_OVERRIDE;
   6065			data &= ~ON_MONITOR_ADD_MASK;
   6066			data |= ON_MONITOR_ADD_EN;
   6067			data |= ON_MONITOR_ADD(0x96);
   6068			if (orig != data)
   6069				WREG32(CGTS_SM_CTRL_REG, data);
   6070		}
   6071	} else {
   6072		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
   6073		data |= 0x00000003;
   6074		if (orig != data)
   6075			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
   6076
   6077		data = RREG32(RLC_MEM_SLP_CNTL);
   6078		if (data & RLC_MEM_LS_EN) {
   6079			data &= ~RLC_MEM_LS_EN;
   6080			WREG32(RLC_MEM_SLP_CNTL, data);
   6081		}
   6082
   6083		data = RREG32(CP_MEM_SLP_CNTL);
   6084		if (data & CP_MEM_LS_EN) {
   6085			data &= ~CP_MEM_LS_EN;
   6086			WREG32(CP_MEM_SLP_CNTL, data);
   6087		}
   6088
   6089		orig = data = RREG32(CGTS_SM_CTRL_REG);
   6090		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
   6091		if (orig != data)
   6092			WREG32(CGTS_SM_CTRL_REG, data);
   6093
   6094		tmp = cik_halt_rlc(rdev);
   6095
   6096		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   6097		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
   6098		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
   6099		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
   6100		WREG32(RLC_SERDES_WR_CTRL, data);
   6101
   6102		cik_update_rlc(rdev, tmp);
   6103	}
   6104}
   6105
   6106static const u32 mc_cg_registers[] =
   6107{
   6108	MC_HUB_MISC_HUB_CG,
   6109	MC_HUB_MISC_SIP_CG,
   6110	MC_HUB_MISC_VM_CG,
   6111	MC_XPB_CLK_GAT,
   6112	ATC_MISC_CG,
   6113	MC_CITF_MISC_WR_CG,
   6114	MC_CITF_MISC_RD_CG,
   6115	MC_CITF_MISC_VM_CG,
   6116	VM_L2_CG,
   6117};
   6118
   6119static void cik_enable_mc_ls(struct radeon_device *rdev,
   6120			     bool enable)
   6121{
   6122	int i;
   6123	u32 orig, data;
   6124
   6125	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
   6126		orig = data = RREG32(mc_cg_registers[i]);
   6127		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
   6128			data |= MC_LS_ENABLE;
   6129		else
   6130			data &= ~MC_LS_ENABLE;
   6131		if (data != orig)
   6132			WREG32(mc_cg_registers[i], data);
   6133	}
   6134}
   6135
   6136static void cik_enable_mc_mgcg(struct radeon_device *rdev,
   6137			       bool enable)
   6138{
   6139	int i;
   6140	u32 orig, data;
   6141
   6142	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
   6143		orig = data = RREG32(mc_cg_registers[i]);
   6144		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
   6145			data |= MC_CG_ENABLE;
   6146		else
   6147			data &= ~MC_CG_ENABLE;
   6148		if (data != orig)
   6149			WREG32(mc_cg_registers[i], data);
   6150	}
   6151}
   6152
   6153static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
   6154				 bool enable)
   6155{
   6156	u32 orig, data;
   6157
   6158	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
   6159		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
   6160		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
   6161	} else {
   6162		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
   6163		data |= 0xff000000;
   6164		if (data != orig)
   6165			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
   6166
   6167		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
   6168		data |= 0xff000000;
   6169		if (data != orig)
   6170			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
   6171	}
   6172}
   6173
   6174static void cik_enable_sdma_mgls(struct radeon_device *rdev,
   6175				 bool enable)
   6176{
   6177	u32 orig, data;
   6178
   6179	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
   6180		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
   6181		data |= 0x100;
   6182		if (orig != data)
   6183			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
   6184
   6185		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
   6186		data |= 0x100;
   6187		if (orig != data)
   6188			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
   6189	} else {
   6190		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
   6191		data &= ~0x100;
   6192		if (orig != data)
   6193			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
   6194
   6195		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
   6196		data &= ~0x100;
   6197		if (orig != data)
   6198			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
   6199	}
   6200}
   6201
   6202static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
   6203				bool enable)
   6204{
   6205	u32 orig, data;
   6206
   6207	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
   6208		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
   6209		data = 0xfff;
   6210		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
   6211
   6212		orig = data = RREG32(UVD_CGC_CTRL);
   6213		data |= DCM;
   6214		if (orig != data)
   6215			WREG32(UVD_CGC_CTRL, data);
   6216	} else {
   6217		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
   6218		data &= ~0xfff;
   6219		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
   6220
   6221		orig = data = RREG32(UVD_CGC_CTRL);
   6222		data &= ~DCM;
   6223		if (orig != data)
   6224			WREG32(UVD_CGC_CTRL, data);
   6225	}
   6226}
   6227
   6228static void cik_enable_bif_mgls(struct radeon_device *rdev,
   6229			       bool enable)
   6230{
   6231	u32 orig, data;
   6232
   6233	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
   6234
   6235	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
   6236		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
   6237			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
   6238	else
   6239		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
   6240			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
   6241
   6242	if (orig != data)
   6243		WREG32_PCIE_PORT(PCIE_CNTL2, data);
   6244}
   6245
   6246static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
   6247				bool enable)
   6248{
   6249	u32 orig, data;
   6250
   6251	orig = data = RREG32(HDP_HOST_PATH_CNTL);
   6252
   6253	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
   6254		data &= ~CLOCK_GATING_DIS;
   6255	else
   6256		data |= CLOCK_GATING_DIS;
   6257
   6258	if (orig != data)
   6259		WREG32(HDP_HOST_PATH_CNTL, data);
   6260}
   6261
   6262static void cik_enable_hdp_ls(struct radeon_device *rdev,
   6263			      bool enable)
   6264{
   6265	u32 orig, data;
   6266
   6267	orig = data = RREG32(HDP_MEM_POWER_LS);
   6268
   6269	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
   6270		data |= HDP_LS_ENABLE;
   6271	else
   6272		data &= ~HDP_LS_ENABLE;
   6273
   6274	if (orig != data)
   6275		WREG32(HDP_MEM_POWER_LS, data);
   6276}
   6277
   6278void cik_update_cg(struct radeon_device *rdev,
   6279		   u32 block, bool enable)
   6280{
   6281
   6282	if (block & RADEON_CG_BLOCK_GFX) {
   6283		cik_enable_gui_idle_interrupt(rdev, false);
   6284		/* order matters! */
   6285		if (enable) {
   6286			cik_enable_mgcg(rdev, true);
   6287			cik_enable_cgcg(rdev, true);
   6288		} else {
   6289			cik_enable_cgcg(rdev, false);
   6290			cik_enable_mgcg(rdev, false);
   6291		}
   6292		cik_enable_gui_idle_interrupt(rdev, true);
   6293	}
   6294
   6295	if (block & RADEON_CG_BLOCK_MC) {
   6296		if (!(rdev->flags & RADEON_IS_IGP)) {
   6297			cik_enable_mc_mgcg(rdev, enable);
   6298			cik_enable_mc_ls(rdev, enable);
   6299		}
   6300	}
   6301
   6302	if (block & RADEON_CG_BLOCK_SDMA) {
   6303		cik_enable_sdma_mgcg(rdev, enable);
   6304		cik_enable_sdma_mgls(rdev, enable);
   6305	}
   6306
   6307	if (block & RADEON_CG_BLOCK_BIF) {
   6308		cik_enable_bif_mgls(rdev, enable);
   6309	}
   6310
   6311	if (block & RADEON_CG_BLOCK_UVD) {
   6312		if (rdev->has_uvd)
   6313			cik_enable_uvd_mgcg(rdev, enable);
   6314	}
   6315
   6316	if (block & RADEON_CG_BLOCK_HDP) {
   6317		cik_enable_hdp_mgcg(rdev, enable);
   6318		cik_enable_hdp_ls(rdev, enable);
   6319	}
   6320
   6321	if (block & RADEON_CG_BLOCK_VCE) {
   6322		vce_v2_0_enable_mgcg(rdev, enable);
   6323	}
   6324}
   6325
   6326static void cik_init_cg(struct radeon_device *rdev)
   6327{
   6328
   6329	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
   6330
   6331	if (rdev->has_uvd)
   6332		si_init_uvd_internal_cg(rdev);
   6333
   6334	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
   6335			     RADEON_CG_BLOCK_SDMA |
   6336			     RADEON_CG_BLOCK_BIF |
   6337			     RADEON_CG_BLOCK_UVD |
   6338			     RADEON_CG_BLOCK_HDP), true);
   6339}
   6340
   6341static void cik_fini_cg(struct radeon_device *rdev)
   6342{
   6343	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
   6344			     RADEON_CG_BLOCK_SDMA |
   6345			     RADEON_CG_BLOCK_BIF |
   6346			     RADEON_CG_BLOCK_UVD |
   6347			     RADEON_CG_BLOCK_HDP), false);
   6348
   6349	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
   6350}
   6351
   6352static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
   6353					  bool enable)
   6354{
   6355	u32 data, orig;
   6356
   6357	orig = data = RREG32(RLC_PG_CNTL);
   6358	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
   6359		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
   6360	else
   6361		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
   6362	if (orig != data)
   6363		WREG32(RLC_PG_CNTL, data);
   6364}
   6365
   6366static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
   6367					  bool enable)
   6368{
   6369	u32 data, orig;
   6370
   6371	orig = data = RREG32(RLC_PG_CNTL);
   6372	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
   6373		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
   6374	else
   6375		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
   6376	if (orig != data)
   6377		WREG32(RLC_PG_CNTL, data);
   6378}
   6379
   6380static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
   6381{
   6382	u32 data, orig;
   6383
   6384	orig = data = RREG32(RLC_PG_CNTL);
   6385	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
   6386		data &= ~DISABLE_CP_PG;
   6387	else
   6388		data |= DISABLE_CP_PG;
   6389	if (orig != data)
   6390		WREG32(RLC_PG_CNTL, data);
   6391}
   6392
   6393static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
   6394{
   6395	u32 data, orig;
   6396
   6397	orig = data = RREG32(RLC_PG_CNTL);
   6398	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
   6399		data &= ~DISABLE_GDS_PG;
   6400	else
   6401		data |= DISABLE_GDS_PG;
   6402	if (orig != data)
   6403		WREG32(RLC_PG_CNTL, data);
   6404}
   6405
   6406#define CP_ME_TABLE_SIZE    96
   6407#define CP_ME_TABLE_OFFSET  2048
   6408#define CP_MEC_TABLE_OFFSET 4096
   6409
   6410void cik_init_cp_pg_table(struct radeon_device *rdev)
   6411{
   6412	volatile u32 *dst_ptr;
   6413	int me, i, max_me = 4;
   6414	u32 bo_offset = 0;
   6415	u32 table_offset, table_size;
   6416
   6417	if (rdev->family == CHIP_KAVERI)
   6418		max_me = 5;
   6419
   6420	if (rdev->rlc.cp_table_ptr == NULL)
   6421		return;
   6422
   6423	/* write the cp table buffer */
   6424	dst_ptr = rdev->rlc.cp_table_ptr;
   6425	for (me = 0; me < max_me; me++) {
   6426		if (rdev->new_fw) {
   6427			const __le32 *fw_data;
   6428			const struct gfx_firmware_header_v1_0 *hdr;
   6429
   6430			if (me == 0) {
   6431				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
   6432				fw_data = (const __le32 *)
   6433					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   6434				table_offset = le32_to_cpu(hdr->jt_offset);
   6435				table_size = le32_to_cpu(hdr->jt_size);
   6436			} else if (me == 1) {
   6437				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
   6438				fw_data = (const __le32 *)
   6439					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   6440				table_offset = le32_to_cpu(hdr->jt_offset);
   6441				table_size = le32_to_cpu(hdr->jt_size);
   6442			} else if (me == 2) {
   6443				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
   6444				fw_data = (const __le32 *)
   6445					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   6446				table_offset = le32_to_cpu(hdr->jt_offset);
   6447				table_size = le32_to_cpu(hdr->jt_size);
   6448			} else if (me == 3) {
   6449				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
   6450				fw_data = (const __le32 *)
   6451					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   6452				table_offset = le32_to_cpu(hdr->jt_offset);
   6453				table_size = le32_to_cpu(hdr->jt_size);
   6454			} else {
   6455				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
   6456				fw_data = (const __le32 *)
   6457					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
   6458				table_offset = le32_to_cpu(hdr->jt_offset);
   6459				table_size = le32_to_cpu(hdr->jt_size);
   6460			}
   6461
   6462			for (i = 0; i < table_size; i ++) {
   6463				dst_ptr[bo_offset + i] =
   6464					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
   6465			}
   6466			bo_offset += table_size;
   6467		} else {
   6468			const __be32 *fw_data;
   6469			table_size = CP_ME_TABLE_SIZE;
   6470
   6471			if (me == 0) {
   6472				fw_data = (const __be32 *)rdev->ce_fw->data;
   6473				table_offset = CP_ME_TABLE_OFFSET;
   6474			} else if (me == 1) {
   6475				fw_data = (const __be32 *)rdev->pfp_fw->data;
   6476				table_offset = CP_ME_TABLE_OFFSET;
   6477			} else if (me == 2) {
   6478				fw_data = (const __be32 *)rdev->me_fw->data;
   6479				table_offset = CP_ME_TABLE_OFFSET;
   6480			} else {
   6481				fw_data = (const __be32 *)rdev->mec_fw->data;
   6482				table_offset = CP_MEC_TABLE_OFFSET;
   6483			}
   6484
   6485			for (i = 0; i < table_size; i ++) {
   6486				dst_ptr[bo_offset + i] =
   6487					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
   6488			}
   6489			bo_offset += table_size;
   6490		}
   6491	}
   6492}
   6493
   6494static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
   6495				bool enable)
   6496{
   6497	u32 data, orig;
   6498
   6499	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
   6500		orig = data = RREG32(RLC_PG_CNTL);
   6501		data |= GFX_PG_ENABLE;
   6502		if (orig != data)
   6503			WREG32(RLC_PG_CNTL, data);
   6504
   6505		orig = data = RREG32(RLC_AUTO_PG_CTRL);
   6506		data |= AUTO_PG_EN;
   6507		if (orig != data)
   6508			WREG32(RLC_AUTO_PG_CTRL, data);
   6509	} else {
   6510		orig = data = RREG32(RLC_PG_CNTL);
   6511		data &= ~GFX_PG_ENABLE;
   6512		if (orig != data)
   6513			WREG32(RLC_PG_CNTL, data);
   6514
   6515		orig = data = RREG32(RLC_AUTO_PG_CTRL);
   6516		data &= ~AUTO_PG_EN;
   6517		if (orig != data)
   6518			WREG32(RLC_AUTO_PG_CTRL, data);
   6519
   6520		data = RREG32(DB_RENDER_CONTROL);
   6521	}
   6522}
   6523
   6524static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
   6525{
   6526	u32 mask = 0, tmp, tmp1;
   6527	int i;
   6528
   6529	cik_select_se_sh(rdev, se, sh);
   6530	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
   6531	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
   6532	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
   6533
   6534	tmp &= 0xffff0000;
   6535
   6536	tmp |= tmp1;
   6537	tmp >>= 16;
   6538
   6539	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
   6540		mask <<= 1;
   6541		mask |= 1;
   6542	}
   6543
   6544	return (~tmp) & mask;
   6545}
   6546
   6547static void cik_init_ao_cu_mask(struct radeon_device *rdev)
   6548{
   6549	u32 i, j, k, active_cu_number = 0;
   6550	u32 mask, counter, cu_bitmap;
   6551	u32 tmp = 0;
   6552
   6553	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
   6554		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
   6555			mask = 1;
   6556			cu_bitmap = 0;
   6557			counter = 0;
   6558			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
   6559				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
   6560					if (counter < 2)
   6561						cu_bitmap |= mask;
   6562					counter ++;
   6563				}
   6564				mask <<= 1;
   6565			}
   6566
   6567			active_cu_number += counter;
   6568			tmp |= (cu_bitmap << (i * 16 + j * 8));
   6569		}
   6570	}
   6571
   6572	WREG32(RLC_PG_AO_CU_MASK, tmp);
   6573
   6574	tmp = RREG32(RLC_MAX_PG_CU);
   6575	tmp &= ~MAX_PU_CU_MASK;
   6576	tmp |= MAX_PU_CU(active_cu_number);
   6577	WREG32(RLC_MAX_PG_CU, tmp);
   6578}
   6579
   6580static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
   6581				       bool enable)
   6582{
   6583	u32 data, orig;
   6584
   6585	orig = data = RREG32(RLC_PG_CNTL);
   6586	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
   6587		data |= STATIC_PER_CU_PG_ENABLE;
   6588	else
   6589		data &= ~STATIC_PER_CU_PG_ENABLE;
   6590	if (orig != data)
   6591		WREG32(RLC_PG_CNTL, data);
   6592}
   6593
   6594static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
   6595					bool enable)
   6596{
   6597	u32 data, orig;
   6598
   6599	orig = data = RREG32(RLC_PG_CNTL);
   6600	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
   6601		data |= DYN_PER_CU_PG_ENABLE;
   6602	else
   6603		data &= ~DYN_PER_CU_PG_ENABLE;
   6604	if (orig != data)
   6605		WREG32(RLC_PG_CNTL, data);
   6606}
   6607
   6608#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
   6609#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
   6610
   6611static void cik_init_gfx_cgpg(struct radeon_device *rdev)
   6612{
   6613	u32 data, orig;
   6614	u32 i;
   6615
   6616	if (rdev->rlc.cs_data) {
   6617		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
   6618		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
   6619		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
   6620		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
   6621	} else {
   6622		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
   6623		for (i = 0; i < 3; i++)
   6624			WREG32(RLC_GPM_SCRATCH_DATA, 0);
   6625	}
   6626	if (rdev->rlc.reg_list) {
   6627		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
   6628		for (i = 0; i < rdev->rlc.reg_list_size; i++)
   6629			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
   6630	}
   6631
   6632	orig = data = RREG32(RLC_PG_CNTL);
   6633	data |= GFX_PG_SRC;
   6634	if (orig != data)
   6635		WREG32(RLC_PG_CNTL, data);
   6636
   6637	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
   6638	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
   6639
   6640	data = RREG32(CP_RB_WPTR_POLL_CNTL);
   6641	data &= ~IDLE_POLL_COUNT_MASK;
   6642	data |= IDLE_POLL_COUNT(0x60);
   6643	WREG32(CP_RB_WPTR_POLL_CNTL, data);
   6644
   6645	data = 0x10101010;
   6646	WREG32(RLC_PG_DELAY, data);
   6647
   6648	data = RREG32(RLC_PG_DELAY_2);
   6649	data &= ~0xff;
   6650	data |= 0x3;
   6651	WREG32(RLC_PG_DELAY_2, data);
   6652
   6653	data = RREG32(RLC_AUTO_PG_CTRL);
   6654	data &= ~GRBM_REG_SGIT_MASK;
   6655	data |= GRBM_REG_SGIT(0x700);
   6656	WREG32(RLC_AUTO_PG_CTRL, data);
   6657
   6658}
   6659
   6660static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
   6661{
   6662	cik_enable_gfx_cgpg(rdev, enable);
   6663	cik_enable_gfx_static_mgpg(rdev, enable);
   6664	cik_enable_gfx_dynamic_mgpg(rdev, enable);
   6665}
   6666
   6667u32 cik_get_csb_size(struct radeon_device *rdev)
   6668{
   6669	u32 count = 0;
   6670	const struct cs_section_def *sect = NULL;
   6671	const struct cs_extent_def *ext = NULL;
   6672
   6673	if (rdev->rlc.cs_data == NULL)
   6674		return 0;
   6675
   6676	/* begin clear state */
   6677	count += 2;
   6678	/* context control state */
   6679	count += 3;
   6680
   6681	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
   6682		for (ext = sect->section; ext->extent != NULL; ++ext) {
   6683			if (sect->id == SECT_CONTEXT)
   6684				count += 2 + ext->reg_count;
   6685			else
   6686				return 0;
   6687		}
   6688	}
   6689	/* pa_sc_raster_config/pa_sc_raster_config1 */
   6690	count += 4;
   6691	/* end clear state */
   6692	count += 2;
   6693	/* clear state */
   6694	count += 2;
   6695
   6696	return count;
   6697}
   6698
   6699void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
   6700{
   6701	u32 count = 0, i;
   6702	const struct cs_section_def *sect = NULL;
   6703	const struct cs_extent_def *ext = NULL;
   6704
   6705	if (rdev->rlc.cs_data == NULL)
   6706		return;
   6707	if (buffer == NULL)
   6708		return;
   6709
   6710	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   6711	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
   6712
   6713	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
   6714	buffer[count++] = cpu_to_le32(0x80000000);
   6715	buffer[count++] = cpu_to_le32(0x80000000);
   6716
   6717	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
   6718		for (ext = sect->section; ext->extent != NULL; ++ext) {
   6719			if (sect->id == SECT_CONTEXT) {
   6720				buffer[count++] =
   6721					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
   6722				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
   6723				for (i = 0; i < ext->reg_count; i++)
   6724					buffer[count++] = cpu_to_le32(ext->extent[i]);
   6725			} else {
   6726				return;
   6727			}
   6728		}
   6729	}
   6730
   6731	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
   6732	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
   6733	switch (rdev->family) {
   6734	case CHIP_BONAIRE:
   6735		buffer[count++] = cpu_to_le32(0x16000012);
   6736		buffer[count++] = cpu_to_le32(0x00000000);
   6737		break;
   6738	case CHIP_KAVERI:
   6739		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
   6740		buffer[count++] = cpu_to_le32(0x00000000);
   6741		break;
   6742	case CHIP_KABINI:
   6743	case CHIP_MULLINS:
   6744		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
   6745		buffer[count++] = cpu_to_le32(0x00000000);
   6746		break;
   6747	case CHIP_HAWAII:
   6748		buffer[count++] = cpu_to_le32(0x3a00161a);
   6749		buffer[count++] = cpu_to_le32(0x0000002e);
   6750		break;
   6751	default:
   6752		buffer[count++] = cpu_to_le32(0x00000000);
   6753		buffer[count++] = cpu_to_le32(0x00000000);
   6754		break;
   6755	}
   6756
   6757	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
   6758	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
   6759
   6760	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
   6761	buffer[count++] = cpu_to_le32(0);
   6762}
   6763
   6764static void cik_init_pg(struct radeon_device *rdev)
   6765{
   6766	if (rdev->pg_flags) {
   6767		cik_enable_sck_slowdown_on_pu(rdev, true);
   6768		cik_enable_sck_slowdown_on_pd(rdev, true);
   6769		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
   6770			cik_init_gfx_cgpg(rdev);
   6771			cik_enable_cp_pg(rdev, true);
   6772			cik_enable_gds_pg(rdev, true);
   6773		}
   6774		cik_init_ao_cu_mask(rdev);
   6775		cik_update_gfx_pg(rdev, true);
   6776	}
   6777}
   6778
   6779static void cik_fini_pg(struct radeon_device *rdev)
   6780{
   6781	if (rdev->pg_flags) {
   6782		cik_update_gfx_pg(rdev, false);
   6783		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
   6784			cik_enable_cp_pg(rdev, false);
   6785			cik_enable_gds_pg(rdev, false);
   6786		}
   6787	}
   6788}
   6789
   6790/*
   6791 * Interrupts
   6792 * Starting with r6xx, interrupts are handled via a ring buffer.
   6793 * Ring buffers are areas of GPU accessible memory that the GPU
   6794 * writes interrupt vectors into and the host reads vectors out of.
   6795 * There is a rptr (read pointer) that determines where the
   6796 * host is currently reading, and a wptr (write pointer)
   6797 * which determines where the GPU has written.  When the
   6798 * pointers are equal, the ring is idle.  When the GPU
   6799 * writes vectors to the ring buffer, it increments the
   6800 * wptr.  When there is an interrupt, the host then starts
   6801 * fetching commands and processing them until the pointers are
   6802 * equal again at which point it updates the rptr.
   6803 */
   6804
   6805/**
   6806 * cik_enable_interrupts - Enable the interrupt ring buffer
   6807 *
   6808 * @rdev: radeon_device pointer
   6809 *
   6810 * Enable the interrupt ring buffer (CIK).
   6811 */
   6812static void cik_enable_interrupts(struct radeon_device *rdev)
   6813{
   6814	u32 ih_cntl = RREG32(IH_CNTL);
   6815	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
   6816
   6817	ih_cntl |= ENABLE_INTR;
   6818	ih_rb_cntl |= IH_RB_ENABLE;
   6819	WREG32(IH_CNTL, ih_cntl);
   6820	WREG32(IH_RB_CNTL, ih_rb_cntl);
   6821	rdev->ih.enabled = true;
   6822}
   6823
   6824/**
   6825 * cik_disable_interrupts - Disable the interrupt ring buffer
   6826 *
   6827 * @rdev: radeon_device pointer
   6828 *
   6829 * Disable the interrupt ring buffer (CIK).
   6830 */
   6831static void cik_disable_interrupts(struct radeon_device *rdev)
   6832{
   6833	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
   6834	u32 ih_cntl = RREG32(IH_CNTL);
   6835
   6836	ih_rb_cntl &= ~IH_RB_ENABLE;
   6837	ih_cntl &= ~ENABLE_INTR;
   6838	WREG32(IH_RB_CNTL, ih_rb_cntl);
   6839	WREG32(IH_CNTL, ih_cntl);
   6840	/* set rptr, wptr to 0 */
   6841	WREG32(IH_RB_RPTR, 0);
   6842	WREG32(IH_RB_WPTR, 0);
   6843	rdev->ih.enabled = false;
   6844	rdev->ih.rptr = 0;
   6845}
   6846
   6847/**
   6848 * cik_disable_interrupt_state - Disable all interrupt sources
   6849 *
   6850 * @rdev: radeon_device pointer
   6851 *
   6852 * Clear all interrupt enable bits used by the driver (CIK).
   6853 */
   6854static void cik_disable_interrupt_state(struct radeon_device *rdev)
   6855{
   6856	u32 tmp;
   6857
   6858	/* gfx ring */
   6859	tmp = RREG32(CP_INT_CNTL_RING0) &
   6860		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
   6861	WREG32(CP_INT_CNTL_RING0, tmp);
   6862	/* sdma */
   6863	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
   6864	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
   6865	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
   6866	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
   6867	/* compute queues */
   6868	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
   6869	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
   6870	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
   6871	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
   6872	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
   6873	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
   6874	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
   6875	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
   6876	/* grbm */
   6877	WREG32(GRBM_INT_CNTL, 0);
   6878	/* SRBM */
   6879	WREG32(SRBM_INT_CNTL, 0);
   6880	/* vline/vblank, etc. */
   6881	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
   6882	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
   6883	if (rdev->num_crtc >= 4) {
   6884		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
   6885		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
   6886	}
   6887	if (rdev->num_crtc >= 6) {
   6888		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
   6889		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
   6890	}
   6891	/* pflip */
   6892	if (rdev->num_crtc >= 2) {
   6893		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
   6894		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
   6895	}
   6896	if (rdev->num_crtc >= 4) {
   6897		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
   6898		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
   6899	}
   6900	if (rdev->num_crtc >= 6) {
   6901		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
   6902		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
   6903	}
   6904
   6905	/* dac hotplug */
   6906	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
   6907
   6908	/* digital hotplug */
   6909	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
   6910	WREG32(DC_HPD1_INT_CONTROL, tmp);
   6911	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
   6912	WREG32(DC_HPD2_INT_CONTROL, tmp);
   6913	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
   6914	WREG32(DC_HPD3_INT_CONTROL, tmp);
   6915	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
   6916	WREG32(DC_HPD4_INT_CONTROL, tmp);
   6917	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
   6918	WREG32(DC_HPD5_INT_CONTROL, tmp);
   6919	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
   6920	WREG32(DC_HPD6_INT_CONTROL, tmp);
   6921
   6922}
   6923
   6924/**
   6925 * cik_irq_init - init and enable the interrupt ring
   6926 *
   6927 * @rdev: radeon_device pointer
   6928 *
   6929 * Allocate a ring buffer for the interrupt controller,
   6930 * enable the RLC, disable interrupts, enable the IH
   6931 * ring buffer and enable it (CIK).
   6932 * Called at device load and reume.
   6933 * Returns 0 for success, errors for failure.
   6934 */
   6935static int cik_irq_init(struct radeon_device *rdev)
   6936{
   6937	int ret = 0;
   6938	int rb_bufsz;
   6939	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
   6940
   6941	/* allocate ring */
   6942	ret = r600_ih_ring_alloc(rdev);
   6943	if (ret)
   6944		return ret;
   6945
   6946	/* disable irqs */
   6947	cik_disable_interrupts(rdev);
   6948
   6949	/* init rlc */
   6950	ret = cik_rlc_resume(rdev);
   6951	if (ret) {
   6952		r600_ih_ring_fini(rdev);
   6953		return ret;
   6954	}
   6955
   6956	/* setup interrupt control */
   6957	/* set dummy read address to dummy page address */
   6958	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
   6959	interrupt_cntl = RREG32(INTERRUPT_CNTL);
   6960	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
   6961	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
   6962	 */
   6963	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
   6964	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
   6965	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
   6966	WREG32(INTERRUPT_CNTL, interrupt_cntl);
   6967
   6968	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
   6969	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
   6970
   6971	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
   6972		      IH_WPTR_OVERFLOW_CLEAR |
   6973		      (rb_bufsz << 1));
   6974
   6975	if (rdev->wb.enabled)
   6976		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
   6977
   6978	/* set the writeback address whether it's enabled or not */
   6979	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
   6980	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
   6981
   6982	WREG32(IH_RB_CNTL, ih_rb_cntl);
   6983
   6984	/* set rptr, wptr to 0 */
   6985	WREG32(IH_RB_RPTR, 0);
   6986	WREG32(IH_RB_WPTR, 0);
   6987
   6988	/* Default settings for IH_CNTL (disabled at first) */
   6989	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
   6990	/* RPTR_REARM only works if msi's are enabled */
   6991	if (rdev->msi_enabled)
   6992		ih_cntl |= RPTR_REARM;
   6993	WREG32(IH_CNTL, ih_cntl);
   6994
   6995	/* force the active interrupt state to all disabled */
   6996	cik_disable_interrupt_state(rdev);
   6997
   6998	pci_set_master(rdev->pdev);
   6999
   7000	/* enable irqs */
   7001	cik_enable_interrupts(rdev);
   7002
   7003	return ret;
   7004}
   7005
   7006/**
   7007 * cik_irq_set - enable/disable interrupt sources
   7008 *
   7009 * @rdev: radeon_device pointer
   7010 *
   7011 * Enable interrupt sources on the GPU (vblanks, hpd,
   7012 * etc.) (CIK).
   7013 * Returns 0 for success, errors for failure.
   7014 */
   7015int cik_irq_set(struct radeon_device *rdev)
   7016{
   7017	u32 cp_int_cntl;
   7018	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
   7019	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
   7020	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
   7021	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
   7022	u32 grbm_int_cntl = 0;
   7023	u32 dma_cntl, dma_cntl1;
   7024
   7025	if (!rdev->irq.installed) {
   7026		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
   7027		return -EINVAL;
   7028	}
   7029	/* don't enable anything if the ih is disabled */
   7030	if (!rdev->ih.enabled) {
   7031		cik_disable_interrupts(rdev);
   7032		/* force the active interrupt state to all disabled */
   7033		cik_disable_interrupt_state(rdev);
   7034		return 0;
   7035	}
   7036
   7037	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
   7038		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
   7039	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
   7040
   7041	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
   7042	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
   7043	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
   7044	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
   7045	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
   7046	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
   7047
   7048	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
   7049	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
   7050
   7051	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
   7052	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
   7053	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
   7054	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
   7055	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
   7056	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
   7057	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
   7058	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
   7059
   7060	/* enable CP interrupts on all rings */
   7061	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
   7062		DRM_DEBUG("cik_irq_set: sw int gfx\n");
   7063		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
   7064	}
   7065	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
   7066		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
   7067		DRM_DEBUG("si_irq_set: sw int cp1\n");
   7068		if (ring->me == 1) {
   7069			switch (ring->pipe) {
   7070			case 0:
   7071				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
   7072				break;
   7073			case 1:
   7074				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
   7075				break;
   7076			case 2:
   7077				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
   7078				break;
   7079			case 3:
   7080				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
   7081				break;
   7082			default:
   7083				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
   7084				break;
   7085			}
   7086		} else if (ring->me == 2) {
   7087			switch (ring->pipe) {
   7088			case 0:
   7089				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
   7090				break;
   7091			case 1:
   7092				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
   7093				break;
   7094			case 2:
   7095				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
   7096				break;
   7097			case 3:
   7098				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
   7099				break;
   7100			default:
   7101				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
   7102				break;
   7103			}
   7104		} else {
   7105			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
   7106		}
   7107	}
   7108	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
   7109		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
   7110		DRM_DEBUG("si_irq_set: sw int cp2\n");
   7111		if (ring->me == 1) {
   7112			switch (ring->pipe) {
   7113			case 0:
   7114				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
   7115				break;
   7116			case 1:
   7117				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
   7118				break;
   7119			case 2:
   7120				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
   7121				break;
   7122			case 3:
   7123				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
   7124				break;
   7125			default:
   7126				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
   7127				break;
   7128			}
   7129		} else if (ring->me == 2) {
   7130			switch (ring->pipe) {
   7131			case 0:
   7132				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
   7133				break;
   7134			case 1:
   7135				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
   7136				break;
   7137			case 2:
   7138				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
   7139				break;
   7140			case 3:
   7141				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
   7142				break;
   7143			default:
   7144				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
   7145				break;
   7146			}
   7147		} else {
   7148			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
   7149		}
   7150	}
   7151
   7152	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
   7153		DRM_DEBUG("cik_irq_set: sw int dma\n");
   7154		dma_cntl |= TRAP_ENABLE;
   7155	}
   7156
   7157	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
   7158		DRM_DEBUG("cik_irq_set: sw int dma1\n");
   7159		dma_cntl1 |= TRAP_ENABLE;
   7160	}
   7161
   7162	if (rdev->irq.crtc_vblank_int[0] ||
   7163	    atomic_read(&rdev->irq.pflip[0])) {
   7164		DRM_DEBUG("cik_irq_set: vblank 0\n");
   7165		crtc1 |= VBLANK_INTERRUPT_MASK;
   7166	}
   7167	if (rdev->irq.crtc_vblank_int[1] ||
   7168	    atomic_read(&rdev->irq.pflip[1])) {
   7169		DRM_DEBUG("cik_irq_set: vblank 1\n");
   7170		crtc2 |= VBLANK_INTERRUPT_MASK;
   7171	}
   7172	if (rdev->irq.crtc_vblank_int[2] ||
   7173	    atomic_read(&rdev->irq.pflip[2])) {
   7174		DRM_DEBUG("cik_irq_set: vblank 2\n");
   7175		crtc3 |= VBLANK_INTERRUPT_MASK;
   7176	}
   7177	if (rdev->irq.crtc_vblank_int[3] ||
   7178	    atomic_read(&rdev->irq.pflip[3])) {
   7179		DRM_DEBUG("cik_irq_set: vblank 3\n");
   7180		crtc4 |= VBLANK_INTERRUPT_MASK;
   7181	}
   7182	if (rdev->irq.crtc_vblank_int[4] ||
   7183	    atomic_read(&rdev->irq.pflip[4])) {
   7184		DRM_DEBUG("cik_irq_set: vblank 4\n");
   7185		crtc5 |= VBLANK_INTERRUPT_MASK;
   7186	}
   7187	if (rdev->irq.crtc_vblank_int[5] ||
   7188	    atomic_read(&rdev->irq.pflip[5])) {
   7189		DRM_DEBUG("cik_irq_set: vblank 5\n");
   7190		crtc6 |= VBLANK_INTERRUPT_MASK;
   7191	}
   7192	if (rdev->irq.hpd[0]) {
   7193		DRM_DEBUG("cik_irq_set: hpd 1\n");
   7194		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
   7195	}
   7196	if (rdev->irq.hpd[1]) {
   7197		DRM_DEBUG("cik_irq_set: hpd 2\n");
   7198		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
   7199	}
   7200	if (rdev->irq.hpd[2]) {
   7201		DRM_DEBUG("cik_irq_set: hpd 3\n");
   7202		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
   7203	}
   7204	if (rdev->irq.hpd[3]) {
   7205		DRM_DEBUG("cik_irq_set: hpd 4\n");
   7206		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
   7207	}
   7208	if (rdev->irq.hpd[4]) {
   7209		DRM_DEBUG("cik_irq_set: hpd 5\n");
   7210		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
   7211	}
   7212	if (rdev->irq.hpd[5]) {
   7213		DRM_DEBUG("cik_irq_set: hpd 6\n");
   7214		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
   7215	}
   7216
   7217	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
   7218
   7219	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
   7220	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
   7221
   7222	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
   7223	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
   7224	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
   7225	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
   7226	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
   7227	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
   7228	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
   7229	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
   7230
   7231	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
   7232
   7233	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
   7234	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
   7235	if (rdev->num_crtc >= 4) {
   7236		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
   7237		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
   7238	}
   7239	if (rdev->num_crtc >= 6) {
   7240		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
   7241		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
   7242	}
   7243
   7244	if (rdev->num_crtc >= 2) {
   7245		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
   7246		       GRPH_PFLIP_INT_MASK);
   7247		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
   7248		       GRPH_PFLIP_INT_MASK);
   7249	}
   7250	if (rdev->num_crtc >= 4) {
   7251		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
   7252		       GRPH_PFLIP_INT_MASK);
   7253		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
   7254		       GRPH_PFLIP_INT_MASK);
   7255	}
   7256	if (rdev->num_crtc >= 6) {
   7257		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
   7258		       GRPH_PFLIP_INT_MASK);
   7259		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
   7260		       GRPH_PFLIP_INT_MASK);
   7261	}
   7262
   7263	WREG32(DC_HPD1_INT_CONTROL, hpd1);
   7264	WREG32(DC_HPD2_INT_CONTROL, hpd2);
   7265	WREG32(DC_HPD3_INT_CONTROL, hpd3);
   7266	WREG32(DC_HPD4_INT_CONTROL, hpd4);
   7267	WREG32(DC_HPD5_INT_CONTROL, hpd5);
   7268	WREG32(DC_HPD6_INT_CONTROL, hpd6);
   7269
   7270	/* posting read */
   7271	RREG32(SRBM_STATUS);
   7272
   7273	return 0;
   7274}
   7275
   7276/**
   7277 * cik_irq_ack - ack interrupt sources
   7278 *
   7279 * @rdev: radeon_device pointer
   7280 *
   7281 * Ack interrupt sources on the GPU (vblanks, hpd,
   7282 * etc.) (CIK).  Certain interrupts sources are sw
   7283 * generated and do not require an explicit ack.
   7284 */
   7285static inline void cik_irq_ack(struct radeon_device *rdev)
   7286{
   7287	u32 tmp;
   7288
   7289	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
   7290	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
   7291	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
   7292	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
   7293	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
   7294	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
   7295	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
   7296
   7297	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
   7298		EVERGREEN_CRTC0_REGISTER_OFFSET);
   7299	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
   7300		EVERGREEN_CRTC1_REGISTER_OFFSET);
   7301	if (rdev->num_crtc >= 4) {
   7302		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
   7303			EVERGREEN_CRTC2_REGISTER_OFFSET);
   7304		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
   7305			EVERGREEN_CRTC3_REGISTER_OFFSET);
   7306	}
   7307	if (rdev->num_crtc >= 6) {
   7308		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
   7309			EVERGREEN_CRTC4_REGISTER_OFFSET);
   7310		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
   7311			EVERGREEN_CRTC5_REGISTER_OFFSET);
   7312	}
   7313
   7314	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
   7315		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
   7316		       GRPH_PFLIP_INT_CLEAR);
   7317	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
   7318		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
   7319		       GRPH_PFLIP_INT_CLEAR);
   7320	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
   7321		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
   7322	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
   7323		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
   7324	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
   7325		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
   7326	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
   7327		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
   7328
   7329	if (rdev->num_crtc >= 4) {
   7330		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
   7331			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
   7332			       GRPH_PFLIP_INT_CLEAR);
   7333		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
   7334			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
   7335			       GRPH_PFLIP_INT_CLEAR);
   7336		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
   7337			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
   7338		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
   7339			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
   7340		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
   7341			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
   7342		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
   7343			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
   7344	}
   7345
   7346	if (rdev->num_crtc >= 6) {
   7347		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
   7348			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
   7349			       GRPH_PFLIP_INT_CLEAR);
   7350		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
   7351			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
   7352			       GRPH_PFLIP_INT_CLEAR);
   7353		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
   7354			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
   7355		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
   7356			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
   7357		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
   7358			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
   7359		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
   7360			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
   7361	}
   7362
   7363	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
   7364		tmp = RREG32(DC_HPD1_INT_CONTROL);
   7365		tmp |= DC_HPDx_INT_ACK;
   7366		WREG32(DC_HPD1_INT_CONTROL, tmp);
   7367	}
   7368	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
   7369		tmp = RREG32(DC_HPD2_INT_CONTROL);
   7370		tmp |= DC_HPDx_INT_ACK;
   7371		WREG32(DC_HPD2_INT_CONTROL, tmp);
   7372	}
   7373	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
   7374		tmp = RREG32(DC_HPD3_INT_CONTROL);
   7375		tmp |= DC_HPDx_INT_ACK;
   7376		WREG32(DC_HPD3_INT_CONTROL, tmp);
   7377	}
   7378	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
   7379		tmp = RREG32(DC_HPD4_INT_CONTROL);
   7380		tmp |= DC_HPDx_INT_ACK;
   7381		WREG32(DC_HPD4_INT_CONTROL, tmp);
   7382	}
   7383	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
   7384		tmp = RREG32(DC_HPD5_INT_CONTROL);
   7385		tmp |= DC_HPDx_INT_ACK;
   7386		WREG32(DC_HPD5_INT_CONTROL, tmp);
   7387	}
   7388	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
   7389		tmp = RREG32(DC_HPD6_INT_CONTROL);
   7390		tmp |= DC_HPDx_INT_ACK;
   7391		WREG32(DC_HPD6_INT_CONTROL, tmp);
   7392	}
   7393	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
   7394		tmp = RREG32(DC_HPD1_INT_CONTROL);
   7395		tmp |= DC_HPDx_RX_INT_ACK;
   7396		WREG32(DC_HPD1_INT_CONTROL, tmp);
   7397	}
   7398	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
   7399		tmp = RREG32(DC_HPD2_INT_CONTROL);
   7400		tmp |= DC_HPDx_RX_INT_ACK;
   7401		WREG32(DC_HPD2_INT_CONTROL, tmp);
   7402	}
   7403	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
   7404		tmp = RREG32(DC_HPD3_INT_CONTROL);
   7405		tmp |= DC_HPDx_RX_INT_ACK;
   7406		WREG32(DC_HPD3_INT_CONTROL, tmp);
   7407	}
   7408	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
   7409		tmp = RREG32(DC_HPD4_INT_CONTROL);
   7410		tmp |= DC_HPDx_RX_INT_ACK;
   7411		WREG32(DC_HPD4_INT_CONTROL, tmp);
   7412	}
   7413	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
   7414		tmp = RREG32(DC_HPD5_INT_CONTROL);
   7415		tmp |= DC_HPDx_RX_INT_ACK;
   7416		WREG32(DC_HPD5_INT_CONTROL, tmp);
   7417	}
   7418	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
   7419		tmp = RREG32(DC_HPD6_INT_CONTROL);
   7420		tmp |= DC_HPDx_RX_INT_ACK;
   7421		WREG32(DC_HPD6_INT_CONTROL, tmp);
   7422	}
   7423}
   7424
   7425/**
   7426 * cik_irq_disable - disable interrupts
   7427 *
   7428 * @rdev: radeon_device pointer
   7429 *
   7430 * Disable interrupts on the hw (CIK).
   7431 */
   7432static void cik_irq_disable(struct radeon_device *rdev)
   7433{
   7434	cik_disable_interrupts(rdev);
   7435	/* Wait and acknowledge irq */
   7436	mdelay(1);
   7437	cik_irq_ack(rdev);
   7438	cik_disable_interrupt_state(rdev);
   7439}
   7440
   7441/**
   7442 * cik_irq_suspend - disable interrupts for suspend
   7443 *
   7444 * @rdev: radeon_device pointer
   7445 *
   7446 * Disable interrupts and stop the RLC (CIK).
   7447 * Used for suspend.
   7448 */
   7449static void cik_irq_suspend(struct radeon_device *rdev)
   7450{
   7451	cik_irq_disable(rdev);
   7452	cik_rlc_stop(rdev);
   7453}
   7454
   7455/**
   7456 * cik_irq_fini - tear down interrupt support
   7457 *
   7458 * @rdev: radeon_device pointer
   7459 *
   7460 * Disable interrupts on the hw and free the IH ring
   7461 * buffer (CIK).
   7462 * Used for driver unload.
   7463 */
   7464static void cik_irq_fini(struct radeon_device *rdev)
   7465{
   7466	cik_irq_suspend(rdev);
   7467	r600_ih_ring_fini(rdev);
   7468}
   7469
   7470/**
   7471 * cik_get_ih_wptr - get the IH ring buffer wptr
   7472 *
   7473 * @rdev: radeon_device pointer
   7474 *
   7475 * Get the IH ring buffer wptr from either the register
   7476 * or the writeback memory buffer (CIK).  Also check for
   7477 * ring buffer overflow and deal with it.
   7478 * Used by cik_irq_process().
   7479 * Returns the value of the wptr.
   7480 */
   7481static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
   7482{
   7483	u32 wptr, tmp;
   7484
   7485	if (rdev->wb.enabled)
   7486		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
   7487	else
   7488		wptr = RREG32(IH_RB_WPTR);
   7489
   7490	if (wptr & RB_OVERFLOW) {
   7491		wptr &= ~RB_OVERFLOW;
   7492		/* When a ring buffer overflow happen start parsing interrupt
   7493		 * from the last not overwritten vector (wptr + 16). Hopefully
   7494		 * this should allow us to catchup.
   7495		 */
   7496		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
   7497			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
   7498		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
   7499		tmp = RREG32(IH_RB_CNTL);
   7500		tmp |= IH_WPTR_OVERFLOW_CLEAR;
   7501		WREG32(IH_RB_CNTL, tmp);
   7502	}
   7503	return (wptr & rdev->ih.ptr_mask);
   7504}
   7505
   7506/*        CIK IV Ring
   7507 * Each IV ring entry is 128 bits:
   7508 * [7:0]    - interrupt source id
   7509 * [31:8]   - reserved
   7510 * [59:32]  - interrupt source data
   7511 * [63:60]  - reserved
   7512 * [71:64]  - RINGID
   7513 *            CP:
   7514 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
   7515 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
   7516 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
   7517 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
   7518 *            PIPE_ID - ME0 0=3D
   7519 *                    - ME1&2 compute dispatcher (4 pipes each)
   7520 *            SDMA:
   7521 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
   7522 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
   7523 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
   7524 * [79:72]  - VMID
   7525 * [95:80]  - PASID
   7526 * [127:96] - reserved
   7527 */
   7528/**
   7529 * cik_irq_process - interrupt handler
   7530 *
   7531 * @rdev: radeon_device pointer
   7532 *
   7533 * Interrupt hander (CIK).  Walk the IH ring,
   7534 * ack interrupts and schedule work to handle
   7535 * interrupt events.
   7536 * Returns irq process return code.
   7537 */
   7538int cik_irq_process(struct radeon_device *rdev)
   7539{
   7540	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
   7541	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
   7542	u32 wptr;
   7543	u32 rptr;
   7544	u32 src_id, src_data, ring_id;
   7545	u8 me_id, pipe_id, queue_id;
   7546	u32 ring_index;
   7547	bool queue_hotplug = false;
   7548	bool queue_dp = false;
   7549	bool queue_reset = false;
   7550	u32 addr, status, mc_client;
   7551	bool queue_thermal = false;
   7552
   7553	if (!rdev->ih.enabled || rdev->shutdown)
   7554		return IRQ_NONE;
   7555
   7556	wptr = cik_get_ih_wptr(rdev);
   7557
   7558restart_ih:
   7559	/* is somebody else already processing irqs? */
   7560	if (atomic_xchg(&rdev->ih.lock, 1))
   7561		return IRQ_NONE;
   7562
   7563	rptr = rdev->ih.rptr;
   7564	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
   7565
   7566	/* Order reading of wptr vs. reading of IH ring data */
   7567	rmb();
   7568
   7569	/* display interrupts */
   7570	cik_irq_ack(rdev);
   7571
   7572	while (rptr != wptr) {
   7573		/* wptr/rptr are in bytes! */
   7574		ring_index = rptr / 4;
   7575
   7576		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
   7577		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
   7578		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
   7579
   7580		switch (src_id) {
   7581		case 1: /* D1 vblank/vline */
   7582			switch (src_data) {
   7583			case 0: /* D1 vblank */
   7584				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
   7585					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7586
   7587				if (rdev->irq.crtc_vblank_int[0]) {
   7588					drm_handle_vblank(rdev->ddev, 0);
   7589					rdev->pm.vblank_sync = true;
   7590					wake_up(&rdev->irq.vblank_queue);
   7591				}
   7592				if (atomic_read(&rdev->irq.pflip[0]))
   7593					radeon_crtc_handle_vblank(rdev, 0);
   7594				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
   7595				DRM_DEBUG("IH: D1 vblank\n");
   7596
   7597				break;
   7598			case 1: /* D1 vline */
   7599				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
   7600					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7601
   7602				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
   7603				DRM_DEBUG("IH: D1 vline\n");
   7604
   7605				break;
   7606			default:
   7607				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   7608				break;
   7609			}
   7610			break;
   7611		case 2: /* D2 vblank/vline */
   7612			switch (src_data) {
   7613			case 0: /* D2 vblank */
   7614				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
   7615					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7616
   7617				if (rdev->irq.crtc_vblank_int[1]) {
   7618					drm_handle_vblank(rdev->ddev, 1);
   7619					rdev->pm.vblank_sync = true;
   7620					wake_up(&rdev->irq.vblank_queue);
   7621				}
   7622				if (atomic_read(&rdev->irq.pflip[1]))
   7623					radeon_crtc_handle_vblank(rdev, 1);
   7624				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
   7625				DRM_DEBUG("IH: D2 vblank\n");
   7626
   7627				break;
   7628			case 1: /* D2 vline */
   7629				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
   7630					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7631
   7632				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
   7633				DRM_DEBUG("IH: D2 vline\n");
   7634
   7635				break;
   7636			default:
   7637				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   7638				break;
   7639			}
   7640			break;
   7641		case 3: /* D3 vblank/vline */
   7642			switch (src_data) {
   7643			case 0: /* D3 vblank */
   7644				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
   7645					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7646
   7647				if (rdev->irq.crtc_vblank_int[2]) {
   7648					drm_handle_vblank(rdev->ddev, 2);
   7649					rdev->pm.vblank_sync = true;
   7650					wake_up(&rdev->irq.vblank_queue);
   7651				}
   7652				if (atomic_read(&rdev->irq.pflip[2]))
   7653					radeon_crtc_handle_vblank(rdev, 2);
   7654				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
   7655				DRM_DEBUG("IH: D3 vblank\n");
   7656
   7657				break;
   7658			case 1: /* D3 vline */
   7659				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
   7660					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7661
   7662				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
   7663				DRM_DEBUG("IH: D3 vline\n");
   7664
   7665				break;
   7666			default:
   7667				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   7668				break;
   7669			}
   7670			break;
   7671		case 4: /* D4 vblank/vline */
   7672			switch (src_data) {
   7673			case 0: /* D4 vblank */
   7674				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
   7675					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7676
   7677				if (rdev->irq.crtc_vblank_int[3]) {
   7678					drm_handle_vblank(rdev->ddev, 3);
   7679					rdev->pm.vblank_sync = true;
   7680					wake_up(&rdev->irq.vblank_queue);
   7681				}
   7682				if (atomic_read(&rdev->irq.pflip[3]))
   7683					radeon_crtc_handle_vblank(rdev, 3);
   7684				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
   7685				DRM_DEBUG("IH: D4 vblank\n");
   7686
   7687				break;
   7688			case 1: /* D4 vline */
   7689				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
   7690					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7691
   7692				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
   7693				DRM_DEBUG("IH: D4 vline\n");
   7694
   7695				break;
   7696			default:
   7697				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   7698				break;
   7699			}
   7700			break;
   7701		case 5: /* D5 vblank/vline */
   7702			switch (src_data) {
   7703			case 0: /* D5 vblank */
   7704				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
   7705					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7706
   7707				if (rdev->irq.crtc_vblank_int[4]) {
   7708					drm_handle_vblank(rdev->ddev, 4);
   7709					rdev->pm.vblank_sync = true;
   7710					wake_up(&rdev->irq.vblank_queue);
   7711				}
   7712				if (atomic_read(&rdev->irq.pflip[4]))
   7713					radeon_crtc_handle_vblank(rdev, 4);
   7714				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
   7715				DRM_DEBUG("IH: D5 vblank\n");
   7716
   7717				break;
   7718			case 1: /* D5 vline */
   7719				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
   7720					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7721
   7722				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
   7723				DRM_DEBUG("IH: D5 vline\n");
   7724
   7725				break;
   7726			default:
   7727				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   7728				break;
   7729			}
   7730			break;
   7731		case 6: /* D6 vblank/vline */
   7732			switch (src_data) {
   7733			case 0: /* D6 vblank */
   7734				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
   7735					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7736
   7737				if (rdev->irq.crtc_vblank_int[5]) {
   7738					drm_handle_vblank(rdev->ddev, 5);
   7739					rdev->pm.vblank_sync = true;
   7740					wake_up(&rdev->irq.vblank_queue);
   7741				}
   7742				if (atomic_read(&rdev->irq.pflip[5]))
   7743					radeon_crtc_handle_vblank(rdev, 5);
   7744				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
   7745				DRM_DEBUG("IH: D6 vblank\n");
   7746
   7747				break;
   7748			case 1: /* D6 vline */
   7749				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
   7750					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7751
   7752				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
   7753				DRM_DEBUG("IH: D6 vline\n");
   7754
   7755				break;
   7756			default:
   7757				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   7758				break;
   7759			}
   7760			break;
   7761		case 8: /* D1 page flip */
   7762		case 10: /* D2 page flip */
   7763		case 12: /* D3 page flip */
   7764		case 14: /* D4 page flip */
   7765		case 16: /* D5 page flip */
   7766		case 18: /* D6 page flip */
   7767			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
   7768			if (radeon_use_pflipirq > 0)
   7769				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
   7770			break;
   7771		case 42: /* HPD hotplug */
   7772			switch (src_data) {
   7773			case 0:
   7774				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
   7775					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7776
   7777				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
   7778				queue_hotplug = true;
   7779				DRM_DEBUG("IH: HPD1\n");
   7780
   7781				break;
   7782			case 1:
   7783				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
   7784					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7785
   7786				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
   7787				queue_hotplug = true;
   7788				DRM_DEBUG("IH: HPD2\n");
   7789
   7790				break;
   7791			case 2:
   7792				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
   7793					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7794
   7795				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
   7796				queue_hotplug = true;
   7797				DRM_DEBUG("IH: HPD3\n");
   7798
   7799				break;
   7800			case 3:
   7801				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
   7802					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7803
   7804				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
   7805				queue_hotplug = true;
   7806				DRM_DEBUG("IH: HPD4\n");
   7807
   7808				break;
   7809			case 4:
   7810				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
   7811					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7812
   7813				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
   7814				queue_hotplug = true;
   7815				DRM_DEBUG("IH: HPD5\n");
   7816
   7817				break;
   7818			case 5:
   7819				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
   7820					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7821
   7822				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
   7823				queue_hotplug = true;
   7824				DRM_DEBUG("IH: HPD6\n");
   7825
   7826				break;
   7827			case 6:
   7828				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
   7829					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7830
   7831				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
   7832				queue_dp = true;
   7833				DRM_DEBUG("IH: HPD_RX 1\n");
   7834
   7835				break;
   7836			case 7:
   7837				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
   7838					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7839
   7840				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
   7841				queue_dp = true;
   7842				DRM_DEBUG("IH: HPD_RX 2\n");
   7843
   7844				break;
   7845			case 8:
   7846				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
   7847					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7848
   7849				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
   7850				queue_dp = true;
   7851				DRM_DEBUG("IH: HPD_RX 3\n");
   7852
   7853				break;
   7854			case 9:
   7855				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
   7856					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7857
   7858				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
   7859				queue_dp = true;
   7860				DRM_DEBUG("IH: HPD_RX 4\n");
   7861
   7862				break;
   7863			case 10:
   7864				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
   7865					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7866
   7867				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
   7868				queue_dp = true;
   7869				DRM_DEBUG("IH: HPD_RX 5\n");
   7870
   7871				break;
   7872			case 11:
   7873				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
   7874					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
   7875
   7876				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
   7877				queue_dp = true;
   7878				DRM_DEBUG("IH: HPD_RX 6\n");
   7879
   7880				break;
   7881			default:
   7882				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   7883				break;
   7884			}
   7885			break;
   7886		case 96:
   7887			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
   7888			WREG32(SRBM_INT_ACK, 0x1);
   7889			break;
   7890		case 124: /* UVD */
   7891			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
   7892			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
   7893			break;
   7894		case 146:
   7895		case 147:
   7896			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
   7897			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
   7898			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
   7899			/* reset addr and status */
   7900			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
   7901			if (addr == 0x0 && status == 0x0)
   7902				break;
   7903			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
   7904			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
   7905				addr);
   7906			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
   7907				status);
   7908			cik_vm_decode_fault(rdev, status, addr, mc_client);
   7909			break;
   7910		case 167: /* VCE */
   7911			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
   7912			switch (src_data) {
   7913			case 0:
   7914				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
   7915				break;
   7916			case 1:
   7917				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
   7918				break;
   7919			default:
   7920				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
   7921				break;
   7922			}
   7923			break;
   7924		case 176: /* GFX RB CP_INT */
   7925		case 177: /* GFX IB CP_INT */
   7926			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
   7927			break;
   7928		case 181: /* CP EOP event */
   7929			DRM_DEBUG("IH: CP EOP\n");
   7930			/* XXX check the bitfield order! */
   7931			me_id = (ring_id & 0x60) >> 5;
   7932			pipe_id = (ring_id & 0x18) >> 3;
   7933			queue_id = (ring_id & 0x7) >> 0;
   7934			switch (me_id) {
   7935			case 0:
   7936				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
   7937				break;
   7938			case 1:
   7939			case 2:
   7940				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
   7941					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
   7942				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
   7943					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
   7944				break;
   7945			}
   7946			break;
   7947		case 184: /* CP Privileged reg access */
   7948			DRM_ERROR("Illegal register access in command stream\n");
   7949			/* XXX check the bitfield order! */
   7950			me_id = (ring_id & 0x60) >> 5;
   7951			switch (me_id) {
   7952			case 0:
   7953				/* This results in a full GPU reset, but all we need to do is soft
   7954				 * reset the CP for gfx
   7955				 */
   7956				queue_reset = true;
   7957				break;
   7958			case 1:
   7959				/* XXX compute */
   7960				queue_reset = true;
   7961				break;
   7962			case 2:
   7963				/* XXX compute */
   7964				queue_reset = true;
   7965				break;
   7966			}
   7967			break;
   7968		case 185: /* CP Privileged inst */
   7969			DRM_ERROR("Illegal instruction in command stream\n");
   7970			/* XXX check the bitfield order! */
   7971			me_id = (ring_id & 0x60) >> 5;
   7972			switch (me_id) {
   7973			case 0:
   7974				/* This results in a full GPU reset, but all we need to do is soft
   7975				 * reset the CP for gfx
   7976				 */
   7977				queue_reset = true;
   7978				break;
   7979			case 1:
   7980				/* XXX compute */
   7981				queue_reset = true;
   7982				break;
   7983			case 2:
   7984				/* XXX compute */
   7985				queue_reset = true;
   7986				break;
   7987			}
   7988			break;
   7989		case 224: /* SDMA trap event */
   7990			/* XXX check the bitfield order! */
   7991			me_id = (ring_id & 0x3) >> 0;
   7992			queue_id = (ring_id & 0xc) >> 2;
   7993			DRM_DEBUG("IH: SDMA trap\n");
   7994			switch (me_id) {
   7995			case 0:
   7996				switch (queue_id) {
   7997				case 0:
   7998					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
   7999					break;
   8000				case 1:
   8001					/* XXX compute */
   8002					break;
   8003				case 2:
   8004					/* XXX compute */
   8005					break;
   8006				}
   8007				break;
   8008			case 1:
   8009				switch (queue_id) {
   8010				case 0:
   8011					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
   8012					break;
   8013				case 1:
   8014					/* XXX compute */
   8015					break;
   8016				case 2:
   8017					/* XXX compute */
   8018					break;
   8019				}
   8020				break;
   8021			}
   8022			break;
   8023		case 230: /* thermal low to high */
   8024			DRM_DEBUG("IH: thermal low to high\n");
   8025			rdev->pm.dpm.thermal.high_to_low = false;
   8026			queue_thermal = true;
   8027			break;
   8028		case 231: /* thermal high to low */
   8029			DRM_DEBUG("IH: thermal high to low\n");
   8030			rdev->pm.dpm.thermal.high_to_low = true;
   8031			queue_thermal = true;
   8032			break;
   8033		case 233: /* GUI IDLE */
   8034			DRM_DEBUG("IH: GUI idle\n");
   8035			break;
   8036		case 241: /* SDMA Privileged inst */
   8037		case 247: /* SDMA Privileged inst */
   8038			DRM_ERROR("Illegal instruction in SDMA command stream\n");
   8039			/* XXX check the bitfield order! */
   8040			me_id = (ring_id & 0x3) >> 0;
   8041			queue_id = (ring_id & 0xc) >> 2;
   8042			switch (me_id) {
   8043			case 0:
   8044				switch (queue_id) {
   8045				case 0:
   8046					queue_reset = true;
   8047					break;
   8048				case 1:
   8049					/* XXX compute */
   8050					queue_reset = true;
   8051					break;
   8052				case 2:
   8053					/* XXX compute */
   8054					queue_reset = true;
   8055					break;
   8056				}
   8057				break;
   8058			case 1:
   8059				switch (queue_id) {
   8060				case 0:
   8061					queue_reset = true;
   8062					break;
   8063				case 1:
   8064					/* XXX compute */
   8065					queue_reset = true;
   8066					break;
   8067				case 2:
   8068					/* XXX compute */
   8069					queue_reset = true;
   8070					break;
   8071				}
   8072				break;
   8073			}
   8074			break;
   8075		default:
   8076			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
   8077			break;
   8078		}
   8079
   8080		/* wptr/rptr are in bytes! */
   8081		rptr += 16;
   8082		rptr &= rdev->ih.ptr_mask;
   8083		WREG32(IH_RB_RPTR, rptr);
   8084	}
   8085	if (queue_dp)
   8086		schedule_work(&rdev->dp_work);
   8087	if (queue_hotplug)
   8088		schedule_delayed_work(&rdev->hotplug_work, 0);
   8089	if (queue_reset) {
   8090		rdev->needs_reset = true;
   8091		wake_up_all(&rdev->fence_queue);
   8092	}
   8093	if (queue_thermal)
   8094		schedule_work(&rdev->pm.dpm.thermal.work);
   8095	rdev->ih.rptr = rptr;
   8096	atomic_set(&rdev->ih.lock, 0);
   8097
   8098	/* make sure wptr hasn't changed while processing */
   8099	wptr = cik_get_ih_wptr(rdev);
   8100	if (wptr != rptr)
   8101		goto restart_ih;
   8102
   8103	return IRQ_HANDLED;
   8104}
   8105
   8106/*
   8107 * startup/shutdown callbacks
   8108 */
   8109static void cik_uvd_init(struct radeon_device *rdev)
   8110{
   8111	int r;
   8112
   8113	if (!rdev->has_uvd)
   8114		return;
   8115
   8116	r = radeon_uvd_init(rdev);
   8117	if (r) {
   8118		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
   8119		/*
   8120		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
   8121		 * to early fails cik_uvd_start() and thus nothing happens
   8122		 * there. So it is pointless to try to go through that code
   8123		 * hence why we disable uvd here.
   8124		 */
   8125		rdev->has_uvd = false;
   8126		return;
   8127	}
   8128	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
   8129	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
   8130}
   8131
   8132static void cik_uvd_start(struct radeon_device *rdev)
   8133{
   8134	int r;
   8135
   8136	if (!rdev->has_uvd)
   8137		return;
   8138
   8139	r = radeon_uvd_resume(rdev);
   8140	if (r) {
   8141		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
   8142		goto error;
   8143	}
   8144	r = uvd_v4_2_resume(rdev);
   8145	if (r) {
   8146		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
   8147		goto error;
   8148	}
   8149	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
   8150	if (r) {
   8151		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
   8152		goto error;
   8153	}
   8154	return;
   8155
   8156error:
   8157	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
   8158}
   8159
   8160static void cik_uvd_resume(struct radeon_device *rdev)
   8161{
   8162	struct radeon_ring *ring;
   8163	int r;
   8164
   8165	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
   8166		return;
   8167
   8168	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
   8169	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
   8170	if (r) {
   8171		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
   8172		return;
   8173	}
   8174	r = uvd_v1_0_init(rdev);
   8175	if (r) {
   8176		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
   8177		return;
   8178	}
   8179}
   8180
   8181static void cik_vce_init(struct radeon_device *rdev)
   8182{
   8183	int r;
   8184
   8185	if (!rdev->has_vce)
   8186		return;
   8187
   8188	r = radeon_vce_init(rdev);
   8189	if (r) {
   8190		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
   8191		/*
   8192		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
   8193		 * to early fails cik_vce_start() and thus nothing happens
   8194		 * there. So it is pointless to try to go through that code
   8195		 * hence why we disable vce here.
   8196		 */
   8197		rdev->has_vce = false;
   8198		return;
   8199	}
   8200	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
   8201	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
   8202	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
   8203	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
   8204}
   8205
   8206static void cik_vce_start(struct radeon_device *rdev)
   8207{
   8208	int r;
   8209
   8210	if (!rdev->has_vce)
   8211		return;
   8212
   8213	r = radeon_vce_resume(rdev);
   8214	if (r) {
   8215		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
   8216		goto error;
   8217	}
   8218	r = vce_v2_0_resume(rdev);
   8219	if (r) {
   8220		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
   8221		goto error;
   8222	}
   8223	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
   8224	if (r) {
   8225		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
   8226		goto error;
   8227	}
   8228	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
   8229	if (r) {
   8230		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
   8231		goto error;
   8232	}
   8233	return;
   8234
   8235error:
   8236	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
   8237	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
   8238}
   8239
   8240static void cik_vce_resume(struct radeon_device *rdev)
   8241{
   8242	struct radeon_ring *ring;
   8243	int r;
   8244
   8245	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
   8246		return;
   8247
   8248	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
   8249	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
   8250	if (r) {
   8251		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
   8252		return;
   8253	}
   8254	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
   8255	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
   8256	if (r) {
   8257		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
   8258		return;
   8259	}
   8260	r = vce_v1_0_init(rdev);
   8261	if (r) {
   8262		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
   8263		return;
   8264	}
   8265}
   8266
   8267/**
   8268 * cik_startup - program the asic to a functional state
   8269 *
   8270 * @rdev: radeon_device pointer
   8271 *
   8272 * Programs the asic to a functional state (CIK).
   8273 * Called by cik_init() and cik_resume().
   8274 * Returns 0 for success, error for failure.
   8275 */
   8276static int cik_startup(struct radeon_device *rdev)
   8277{
   8278	struct radeon_ring *ring;
   8279	u32 nop;
   8280	int r;
   8281
   8282	/* enable pcie gen2/3 link */
   8283	cik_pcie_gen3_enable(rdev);
   8284	/* enable aspm */
   8285	cik_program_aspm(rdev);
   8286
   8287	/* scratch needs to be initialized before MC */
   8288	r = r600_vram_scratch_init(rdev);
   8289	if (r)
   8290		return r;
   8291
   8292	cik_mc_program(rdev);
   8293
   8294	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
   8295		r = ci_mc_load_microcode(rdev);
   8296		if (r) {
   8297			DRM_ERROR("Failed to load MC firmware!\n");
   8298			return r;
   8299		}
   8300	}
   8301
   8302	r = cik_pcie_gart_enable(rdev);
   8303	if (r)
   8304		return r;
   8305	cik_gpu_init(rdev);
   8306
   8307	/* allocate rlc buffers */
   8308	if (rdev->flags & RADEON_IS_IGP) {
   8309		if (rdev->family == CHIP_KAVERI) {
   8310			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
   8311			rdev->rlc.reg_list_size =
   8312				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
   8313		} else {
   8314			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
   8315			rdev->rlc.reg_list_size =
   8316				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
   8317		}
   8318	}
   8319	rdev->rlc.cs_data = ci_cs_data;
   8320	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
   8321	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
   8322	r = sumo_rlc_init(rdev);
   8323	if (r) {
   8324		DRM_ERROR("Failed to init rlc BOs!\n");
   8325		return r;
   8326	}
   8327
   8328	/* allocate wb buffer */
   8329	r = radeon_wb_init(rdev);
   8330	if (r)
   8331		return r;
   8332
   8333	/* allocate mec buffers */
   8334	r = cik_mec_init(rdev);
   8335	if (r) {
   8336		DRM_ERROR("Failed to init MEC BOs!\n");
   8337		return r;
   8338	}
   8339
   8340	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
   8341	if (r) {
   8342		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
   8343		return r;
   8344	}
   8345
   8346	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
   8347	if (r) {
   8348		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
   8349		return r;
   8350	}
   8351
   8352	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
   8353	if (r) {
   8354		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
   8355		return r;
   8356	}
   8357
   8358	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
   8359	if (r) {
   8360		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
   8361		return r;
   8362	}
   8363
   8364	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
   8365	if (r) {
   8366		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
   8367		return r;
   8368	}
   8369
   8370	cik_uvd_start(rdev);
   8371	cik_vce_start(rdev);
   8372
   8373	/* Enable IRQ */
   8374	if (!rdev->irq.installed) {
   8375		r = radeon_irq_kms_init(rdev);
   8376		if (r)
   8377			return r;
   8378	}
   8379
   8380	r = cik_irq_init(rdev);
   8381	if (r) {
   8382		DRM_ERROR("radeon: IH init failed (%d).\n", r);
   8383		radeon_irq_kms_fini(rdev);
   8384		return r;
   8385	}
   8386	cik_irq_set(rdev);
   8387
   8388	if (rdev->family == CHIP_HAWAII) {
   8389		if (rdev->new_fw)
   8390			nop = PACKET3(PACKET3_NOP, 0x3FFF);
   8391		else
   8392			nop = RADEON_CP_PACKET2;
   8393	} else {
   8394		nop = PACKET3(PACKET3_NOP, 0x3FFF);
   8395	}
   8396
   8397	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
   8398	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
   8399			     nop);
   8400	if (r)
   8401		return r;
   8402
   8403	/* set up the compute queues */
   8404	/* type-2 packets are deprecated on MEC, use type-3 instead */
   8405	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
   8406	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
   8407			     nop);
   8408	if (r)
   8409		return r;
   8410	ring->me = 1; /* first MEC */
   8411	ring->pipe = 0; /* first pipe */
   8412	ring->queue = 0; /* first queue */
   8413	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
   8414
   8415	/* type-2 packets are deprecated on MEC, use type-3 instead */
   8416	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
   8417	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
   8418			     nop);
   8419	if (r)
   8420		return r;
   8421	/* dGPU only have 1 MEC */
   8422	ring->me = 1; /* first MEC */
   8423	ring->pipe = 0; /* first pipe */
   8424	ring->queue = 1; /* second queue */
   8425	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
   8426
   8427	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
   8428	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
   8429			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
   8430	if (r)
   8431		return r;
   8432
   8433	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
   8434	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
   8435			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
   8436	if (r)
   8437		return r;
   8438
   8439	r = cik_cp_resume(rdev);
   8440	if (r)
   8441		return r;
   8442
   8443	r = cik_sdma_resume(rdev);
   8444	if (r)
   8445		return r;
   8446
   8447	cik_uvd_resume(rdev);
   8448	cik_vce_resume(rdev);
   8449
   8450	r = radeon_ib_pool_init(rdev);
   8451	if (r) {
   8452		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
   8453		return r;
   8454	}
   8455
   8456	r = radeon_vm_manager_init(rdev);
   8457	if (r) {
   8458		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
   8459		return r;
   8460	}
   8461
   8462	r = radeon_audio_init(rdev);
   8463	if (r)
   8464		return r;
   8465
   8466	return 0;
   8467}
   8468
   8469/**
   8470 * cik_resume - resume the asic to a functional state
   8471 *
   8472 * @rdev: radeon_device pointer
   8473 *
   8474 * Programs the asic to a functional state (CIK).
   8475 * Called at resume.
   8476 * Returns 0 for success, error for failure.
   8477 */
   8478int cik_resume(struct radeon_device *rdev)
   8479{
   8480	int r;
   8481
   8482	/* post card */
   8483	atom_asic_init(rdev->mode_info.atom_context);
   8484
   8485	/* init golden registers */
   8486	cik_init_golden_registers(rdev);
   8487
   8488	if (rdev->pm.pm_method == PM_METHOD_DPM)
   8489		radeon_pm_resume(rdev);
   8490
   8491	rdev->accel_working = true;
   8492	r = cik_startup(rdev);
   8493	if (r) {
   8494		DRM_ERROR("cik startup failed on resume\n");
   8495		rdev->accel_working = false;
   8496		return r;
   8497	}
   8498
   8499	return r;
   8500
   8501}
   8502
   8503/**
   8504 * cik_suspend - suspend the asic
   8505 *
   8506 * @rdev: radeon_device pointer
   8507 *
   8508 * Bring the chip into a state suitable for suspend (CIK).
   8509 * Called at suspend.
   8510 * Returns 0 for success.
   8511 */
   8512int cik_suspend(struct radeon_device *rdev)
   8513{
   8514	radeon_pm_suspend(rdev);
   8515	radeon_audio_fini(rdev);
   8516	radeon_vm_manager_fini(rdev);
   8517	cik_cp_enable(rdev, false);
   8518	cik_sdma_enable(rdev, false);
   8519	if (rdev->has_uvd) {
   8520		radeon_uvd_suspend(rdev);
   8521		uvd_v1_0_fini(rdev);
   8522	}
   8523	if (rdev->has_vce)
   8524		radeon_vce_suspend(rdev);
   8525	cik_fini_pg(rdev);
   8526	cik_fini_cg(rdev);
   8527	cik_irq_suspend(rdev);
   8528	radeon_wb_disable(rdev);
   8529	cik_pcie_gart_disable(rdev);
   8530	return 0;
   8531}
   8532
   8533/* Plan is to move initialization in that function and use
   8534 * helper function so that radeon_device_init pretty much
   8535 * do nothing more than calling asic specific function. This
   8536 * should also allow to remove a bunch of callback function
   8537 * like vram_info.
   8538 */
   8539/**
   8540 * cik_init - asic specific driver and hw init
   8541 *
   8542 * @rdev: radeon_device pointer
   8543 *
   8544 * Setup asic specific driver variables and program the hw
   8545 * to a functional state (CIK).
   8546 * Called at driver startup.
   8547 * Returns 0 for success, errors for failure.
   8548 */
   8549int cik_init(struct radeon_device *rdev)
   8550{
   8551	struct radeon_ring *ring;
   8552	int r;
   8553
   8554	/* Read BIOS */
   8555	if (!radeon_get_bios(rdev)) {
   8556		if (ASIC_IS_AVIVO(rdev))
   8557			return -EINVAL;
   8558	}
   8559	/* Must be an ATOMBIOS */
   8560	if (!rdev->is_atom_bios) {
   8561		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
   8562		return -EINVAL;
   8563	}
   8564	r = radeon_atombios_init(rdev);
   8565	if (r)
   8566		return r;
   8567
   8568	/* Post card if necessary */
   8569	if (!radeon_card_posted(rdev)) {
   8570		if (!rdev->bios) {
   8571			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
   8572			return -EINVAL;
   8573		}
   8574		DRM_INFO("GPU not posted. posting now...\n");
   8575		atom_asic_init(rdev->mode_info.atom_context);
   8576	}
   8577	/* init golden registers */
   8578	cik_init_golden_registers(rdev);
   8579	/* Initialize scratch registers */
   8580	cik_scratch_init(rdev);
   8581	/* Initialize surface registers */
   8582	radeon_surface_init(rdev);
   8583	/* Initialize clocks */
   8584	radeon_get_clock_info(rdev->ddev);
   8585
   8586	/* Fence driver */
   8587	radeon_fence_driver_init(rdev);
   8588
   8589	/* initialize memory controller */
   8590	r = cik_mc_init(rdev);
   8591	if (r)
   8592		return r;
   8593	/* Memory manager */
   8594	r = radeon_bo_init(rdev);
   8595	if (r)
   8596		return r;
   8597
   8598	if (rdev->flags & RADEON_IS_IGP) {
   8599		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
   8600		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
   8601			r = cik_init_microcode(rdev);
   8602			if (r) {
   8603				DRM_ERROR("Failed to load firmware!\n");
   8604				return r;
   8605			}
   8606		}
   8607	} else {
   8608		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
   8609		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
   8610		    !rdev->mc_fw) {
   8611			r = cik_init_microcode(rdev);
   8612			if (r) {
   8613				DRM_ERROR("Failed to load firmware!\n");
   8614				return r;
   8615			}
   8616		}
   8617	}
   8618
   8619	/* Initialize power management */
   8620	radeon_pm_init(rdev);
   8621
   8622	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
   8623	ring->ring_obj = NULL;
   8624	r600_ring_init(rdev, ring, 1024 * 1024);
   8625
   8626	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
   8627	ring->ring_obj = NULL;
   8628	r600_ring_init(rdev, ring, 1024 * 1024);
   8629	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
   8630	if (r)
   8631		return r;
   8632
   8633	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
   8634	ring->ring_obj = NULL;
   8635	r600_ring_init(rdev, ring, 1024 * 1024);
   8636	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
   8637	if (r)
   8638		return r;
   8639
   8640	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
   8641	ring->ring_obj = NULL;
   8642	r600_ring_init(rdev, ring, 256 * 1024);
   8643
   8644	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
   8645	ring->ring_obj = NULL;
   8646	r600_ring_init(rdev, ring, 256 * 1024);
   8647
   8648	cik_uvd_init(rdev);
   8649	cik_vce_init(rdev);
   8650
   8651	rdev->ih.ring_obj = NULL;
   8652	r600_ih_ring_init(rdev, 64 * 1024);
   8653
   8654	r = r600_pcie_gart_init(rdev);
   8655	if (r)
   8656		return r;
   8657
   8658	rdev->accel_working = true;
   8659	r = cik_startup(rdev);
   8660	if (r) {
   8661		dev_err(rdev->dev, "disabling GPU acceleration\n");
   8662		cik_cp_fini(rdev);
   8663		cik_sdma_fini(rdev);
   8664		cik_irq_fini(rdev);
   8665		sumo_rlc_fini(rdev);
   8666		cik_mec_fini(rdev);
   8667		radeon_wb_fini(rdev);
   8668		radeon_ib_pool_fini(rdev);
   8669		radeon_vm_manager_fini(rdev);
   8670		radeon_irq_kms_fini(rdev);
   8671		cik_pcie_gart_fini(rdev);
   8672		rdev->accel_working = false;
   8673	}
   8674
   8675	/* Don't start up if the MC ucode is missing.
   8676	 * The default clocks and voltages before the MC ucode
   8677	 * is loaded are not suffient for advanced operations.
   8678	 */
   8679	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
   8680		DRM_ERROR("radeon: MC ucode required for NI+.\n");
   8681		return -EINVAL;
   8682	}
   8683
   8684	return 0;
   8685}
   8686
   8687/**
   8688 * cik_fini - asic specific driver and hw fini
   8689 *
   8690 * @rdev: radeon_device pointer
   8691 *
   8692 * Tear down the asic specific driver variables and program the hw
   8693 * to an idle state (CIK).
   8694 * Called at driver unload.
   8695 */
   8696void cik_fini(struct radeon_device *rdev)
   8697{
   8698	radeon_pm_fini(rdev);
   8699	cik_cp_fini(rdev);
   8700	cik_sdma_fini(rdev);
   8701	cik_fini_pg(rdev);
   8702	cik_fini_cg(rdev);
   8703	cik_irq_fini(rdev);
   8704	sumo_rlc_fini(rdev);
   8705	cik_mec_fini(rdev);
   8706	radeon_wb_fini(rdev);
   8707	radeon_vm_manager_fini(rdev);
   8708	radeon_ib_pool_fini(rdev);
   8709	radeon_irq_kms_fini(rdev);
   8710	uvd_v1_0_fini(rdev);
   8711	radeon_uvd_fini(rdev);
   8712	radeon_vce_fini(rdev);
   8713	cik_pcie_gart_fini(rdev);
   8714	r600_vram_scratch_fini(rdev);
   8715	radeon_gem_fini(rdev);
   8716	radeon_fence_driver_fini(rdev);
   8717	radeon_bo_fini(rdev);
   8718	radeon_atombios_fini(rdev);
   8719	kfree(rdev->bios);
   8720	rdev->bios = NULL;
   8721}
   8722
   8723void dce8_program_fmt(struct drm_encoder *encoder)
   8724{
   8725	struct drm_device *dev = encoder->dev;
   8726	struct radeon_device *rdev = dev->dev_private;
   8727	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
   8728	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
   8729	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
   8730	int bpc = 0;
   8731	u32 tmp = 0;
   8732	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
   8733
   8734	if (connector) {
   8735		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
   8736		bpc = radeon_get_monitor_bpc(connector);
   8737		dither = radeon_connector->dither;
   8738	}
   8739
   8740	/* LVDS/eDP FMT is set up by atom */
   8741	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
   8742		return;
   8743
   8744	/* not needed for analog */
   8745	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
   8746	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
   8747		return;
   8748
   8749	if (bpc == 0)
   8750		return;
   8751
   8752	switch (bpc) {
   8753	case 6:
   8754		if (dither == RADEON_FMT_DITHER_ENABLE)
   8755			/* XXX sort out optimal dither settings */
   8756			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
   8757				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
   8758		else
   8759			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
   8760		break;
   8761	case 8:
   8762		if (dither == RADEON_FMT_DITHER_ENABLE)
   8763			/* XXX sort out optimal dither settings */
   8764			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
   8765				FMT_RGB_RANDOM_ENABLE |
   8766				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
   8767		else
   8768			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
   8769		break;
   8770	case 10:
   8771		if (dither == RADEON_FMT_DITHER_ENABLE)
   8772			/* XXX sort out optimal dither settings */
   8773			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
   8774				FMT_RGB_RANDOM_ENABLE |
   8775				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
   8776		else
   8777			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
   8778		break;
   8779	default:
   8780		/* not needed */
   8781		break;
   8782	}
   8783
   8784	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
   8785}
   8786
   8787/* display watermark setup */
   8788/**
   8789 * dce8_line_buffer_adjust - Set up the line buffer
   8790 *
   8791 * @rdev: radeon_device pointer
   8792 * @radeon_crtc: the selected display controller
   8793 * @mode: the current display mode on the selected display
   8794 * controller
   8795 *
   8796 * Setup up the line buffer allocation for
   8797 * the selected display controller (CIK).
   8798 * Returns the line buffer size in pixels.
   8799 */
   8800static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
   8801				   struct radeon_crtc *radeon_crtc,
   8802				   struct drm_display_mode *mode)
   8803{
   8804	u32 tmp, buffer_alloc, i;
   8805	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
   8806	/*
   8807	 * Line Buffer Setup
   8808	 * There are 6 line buffers, one for each display controllers.
   8809	 * There are 3 partitions per LB. Select the number of partitions
   8810	 * to enable based on the display width.  For display widths larger
   8811	 * than 4096, you need use to use 2 display controllers and combine
   8812	 * them using the stereo blender.
   8813	 */
   8814	if (radeon_crtc->base.enabled && mode) {
   8815		if (mode->crtc_hdisplay < 1920) {
   8816			tmp = 1;
   8817			buffer_alloc = 2;
   8818		} else if (mode->crtc_hdisplay < 2560) {
   8819			tmp = 2;
   8820			buffer_alloc = 2;
   8821		} else if (mode->crtc_hdisplay < 4096) {
   8822			tmp = 0;
   8823			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
   8824		} else {
   8825			DRM_DEBUG_KMS("Mode too big for LB!\n");
   8826			tmp = 0;
   8827			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
   8828		}
   8829	} else {
   8830		tmp = 1;
   8831		buffer_alloc = 0;
   8832	}
   8833
   8834	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
   8835	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
   8836
   8837	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
   8838	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
   8839	for (i = 0; i < rdev->usec_timeout; i++) {
   8840		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
   8841		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
   8842			break;
   8843		udelay(1);
   8844	}
   8845
   8846	if (radeon_crtc->base.enabled && mode) {
   8847		switch (tmp) {
   8848		case 0:
   8849		default:
   8850			return 4096 * 2;
   8851		case 1:
   8852			return 1920 * 2;
   8853		case 2:
   8854			return 2560 * 2;
   8855		}
   8856	}
   8857
   8858	/* controller not enabled, so no lb used */
   8859	return 0;
   8860}
   8861
   8862/**
   8863 * cik_get_number_of_dram_channels - get the number of dram channels
   8864 *
   8865 * @rdev: radeon_device pointer
   8866 *
   8867 * Look up the number of video ram channels (CIK).
   8868 * Used for display watermark bandwidth calculations
   8869 * Returns the number of dram channels
   8870 */
   8871static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
   8872{
   8873	u32 tmp = RREG32(MC_SHARED_CHMAP);
   8874
   8875	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
   8876	case 0:
   8877	default:
   8878		return 1;
   8879	case 1:
   8880		return 2;
   8881	case 2:
   8882		return 4;
   8883	case 3:
   8884		return 8;
   8885	case 4:
   8886		return 3;
   8887	case 5:
   8888		return 6;
   8889	case 6:
   8890		return 10;
   8891	case 7:
   8892		return 12;
   8893	case 8:
   8894		return 16;
   8895	}
   8896}
   8897
   8898struct dce8_wm_params {
   8899	u32 dram_channels; /* number of dram channels */
   8900	u32 yclk;          /* bandwidth per dram data pin in kHz */
   8901	u32 sclk;          /* engine clock in kHz */
   8902	u32 disp_clk;      /* display clock in kHz */
   8903	u32 src_width;     /* viewport width */
   8904	u32 active_time;   /* active display time in ns */
   8905	u32 blank_time;    /* blank time in ns */
   8906	bool interlaced;    /* mode is interlaced */
   8907	fixed20_12 vsc;    /* vertical scale ratio */
   8908	u32 num_heads;     /* number of active crtcs */
   8909	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
   8910	u32 lb_size;       /* line buffer allocated to pipe */
   8911	u32 vtaps;         /* vertical scaler taps */
   8912};
   8913
   8914/**
   8915 * dce8_dram_bandwidth - get the dram bandwidth
   8916 *
   8917 * @wm: watermark calculation data
   8918 *
   8919 * Calculate the raw dram bandwidth (CIK).
   8920 * Used for display watermark bandwidth calculations
   8921 * Returns the dram bandwidth in MBytes/s
   8922 */
   8923static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
   8924{
   8925	/* Calculate raw DRAM Bandwidth */
   8926	fixed20_12 dram_efficiency; /* 0.7 */
   8927	fixed20_12 yclk, dram_channels, bandwidth;
   8928	fixed20_12 a;
   8929
   8930	a.full = dfixed_const(1000);
   8931	yclk.full = dfixed_const(wm->yclk);
   8932	yclk.full = dfixed_div(yclk, a);
   8933	dram_channels.full = dfixed_const(wm->dram_channels * 4);
   8934	a.full = dfixed_const(10);
   8935	dram_efficiency.full = dfixed_const(7);
   8936	dram_efficiency.full = dfixed_div(dram_efficiency, a);
   8937	bandwidth.full = dfixed_mul(dram_channels, yclk);
   8938	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
   8939
   8940	return dfixed_trunc(bandwidth);
   8941}
   8942
   8943/**
   8944 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
   8945 *
   8946 * @wm: watermark calculation data
   8947 *
   8948 * Calculate the dram bandwidth used for display (CIK).
   8949 * Used for display watermark bandwidth calculations
   8950 * Returns the dram bandwidth for display in MBytes/s
   8951 */
   8952static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
   8953{
   8954	/* Calculate DRAM Bandwidth and the part allocated to display. */
   8955	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
   8956	fixed20_12 yclk, dram_channels, bandwidth;
   8957	fixed20_12 a;
   8958
   8959	a.full = dfixed_const(1000);
   8960	yclk.full = dfixed_const(wm->yclk);
   8961	yclk.full = dfixed_div(yclk, a);
   8962	dram_channels.full = dfixed_const(wm->dram_channels * 4);
   8963	a.full = dfixed_const(10);
   8964	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
   8965	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
   8966	bandwidth.full = dfixed_mul(dram_channels, yclk);
   8967	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
   8968
   8969	return dfixed_trunc(bandwidth);
   8970}
   8971
   8972/**
   8973 * dce8_data_return_bandwidth - get the data return bandwidth
   8974 *
   8975 * @wm: watermark calculation data
   8976 *
   8977 * Calculate the data return bandwidth used for display (CIK).
   8978 * Used for display watermark bandwidth calculations
   8979 * Returns the data return bandwidth in MBytes/s
   8980 */
   8981static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
   8982{
   8983	/* Calculate the display Data return Bandwidth */
   8984	fixed20_12 return_efficiency; /* 0.8 */
   8985	fixed20_12 sclk, bandwidth;
   8986	fixed20_12 a;
   8987
   8988	a.full = dfixed_const(1000);
   8989	sclk.full = dfixed_const(wm->sclk);
   8990	sclk.full = dfixed_div(sclk, a);
   8991	a.full = dfixed_const(10);
   8992	return_efficiency.full = dfixed_const(8);
   8993	return_efficiency.full = dfixed_div(return_efficiency, a);
   8994	a.full = dfixed_const(32);
   8995	bandwidth.full = dfixed_mul(a, sclk);
   8996	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
   8997
   8998	return dfixed_trunc(bandwidth);
   8999}
   9000
   9001/**
   9002 * dce8_dmif_request_bandwidth - get the dmif bandwidth
   9003 *
   9004 * @wm: watermark calculation data
   9005 *
   9006 * Calculate the dmif bandwidth used for display (CIK).
   9007 * Used for display watermark bandwidth calculations
   9008 * Returns the dmif bandwidth in MBytes/s
   9009 */
   9010static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
   9011{
   9012	/* Calculate the DMIF Request Bandwidth */
   9013	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
   9014	fixed20_12 disp_clk, bandwidth;
   9015	fixed20_12 a, b;
   9016
   9017	a.full = dfixed_const(1000);
   9018	disp_clk.full = dfixed_const(wm->disp_clk);
   9019	disp_clk.full = dfixed_div(disp_clk, a);
   9020	a.full = dfixed_const(32);
   9021	b.full = dfixed_mul(a, disp_clk);
   9022
   9023	a.full = dfixed_const(10);
   9024	disp_clk_request_efficiency.full = dfixed_const(8);
   9025	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
   9026
   9027	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
   9028
   9029	return dfixed_trunc(bandwidth);
   9030}
   9031
   9032/**
   9033 * dce8_available_bandwidth - get the min available bandwidth
   9034 *
   9035 * @wm: watermark calculation data
   9036 *
   9037 * Calculate the min available bandwidth used for display (CIK).
   9038 * Used for display watermark bandwidth calculations
   9039 * Returns the min available bandwidth in MBytes/s
   9040 */
   9041static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
   9042{
   9043	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
   9044	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
   9045	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
   9046	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
   9047
   9048	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
   9049}
   9050
   9051/**
   9052 * dce8_average_bandwidth - get the average available bandwidth
   9053 *
   9054 * @wm: watermark calculation data
   9055 *
   9056 * Calculate the average available bandwidth used for display (CIK).
   9057 * Used for display watermark bandwidth calculations
   9058 * Returns the average available bandwidth in MBytes/s
   9059 */
   9060static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
   9061{
   9062	/* Calculate the display mode Average Bandwidth
   9063	 * DisplayMode should contain the source and destination dimensions,
   9064	 * timing, etc.
   9065	 */
   9066	fixed20_12 bpp;
   9067	fixed20_12 line_time;
   9068	fixed20_12 src_width;
   9069	fixed20_12 bandwidth;
   9070	fixed20_12 a;
   9071
   9072	a.full = dfixed_const(1000);
   9073	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
   9074	line_time.full = dfixed_div(line_time, a);
   9075	bpp.full = dfixed_const(wm->bytes_per_pixel);
   9076	src_width.full = dfixed_const(wm->src_width);
   9077	bandwidth.full = dfixed_mul(src_width, bpp);
   9078	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
   9079	bandwidth.full = dfixed_div(bandwidth, line_time);
   9080
   9081	return dfixed_trunc(bandwidth);
   9082}
   9083
   9084/**
   9085 * dce8_latency_watermark - get the latency watermark
   9086 *
   9087 * @wm: watermark calculation data
   9088 *
   9089 * Calculate the latency watermark (CIK).
   9090 * Used for display watermark bandwidth calculations
   9091 * Returns the latency watermark in ns
   9092 */
   9093static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
   9094{
   9095	/* First calculate the latency in ns */
   9096	u32 mc_latency = 2000; /* 2000 ns. */
   9097	u32 available_bandwidth = dce8_available_bandwidth(wm);
   9098	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
   9099	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
   9100	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
   9101	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
   9102		(wm->num_heads * cursor_line_pair_return_time);
   9103	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
   9104	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
   9105	u32 tmp, dmif_size = 12288;
   9106	fixed20_12 a, b, c;
   9107
   9108	if (wm->num_heads == 0)
   9109		return 0;
   9110
   9111	a.full = dfixed_const(2);
   9112	b.full = dfixed_const(1);
   9113	if ((wm->vsc.full > a.full) ||
   9114	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
   9115	    (wm->vtaps >= 5) ||
   9116	    ((wm->vsc.full >= a.full) && wm->interlaced))
   9117		max_src_lines_per_dst_line = 4;
   9118	else
   9119		max_src_lines_per_dst_line = 2;
   9120
   9121	a.full = dfixed_const(available_bandwidth);
   9122	b.full = dfixed_const(wm->num_heads);
   9123	a.full = dfixed_div(a, b);
   9124	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
   9125	tmp = min(dfixed_trunc(a), tmp);
   9126
   9127	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
   9128
   9129	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
   9130	b.full = dfixed_const(1000);
   9131	c.full = dfixed_const(lb_fill_bw);
   9132	b.full = dfixed_div(c, b);
   9133	a.full = dfixed_div(a, b);
   9134	line_fill_time = dfixed_trunc(a);
   9135
   9136	if (line_fill_time < wm->active_time)
   9137		return latency;
   9138	else
   9139		return latency + (line_fill_time - wm->active_time);
   9140
   9141}
   9142
   9143/**
   9144 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
   9145 * average and available dram bandwidth
   9146 *
   9147 * @wm: watermark calculation data
   9148 *
   9149 * Check if the display average bandwidth fits in the display
   9150 * dram bandwidth (CIK).
   9151 * Used for display watermark bandwidth calculations
   9152 * Returns true if the display fits, false if not.
   9153 */
   9154static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
   9155{
   9156	if (dce8_average_bandwidth(wm) <=
   9157	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
   9158		return true;
   9159	else
   9160		return false;
   9161}
   9162
   9163/**
   9164 * dce8_average_bandwidth_vs_available_bandwidth - check
   9165 * average and available bandwidth
   9166 *
   9167 * @wm: watermark calculation data
   9168 *
   9169 * Check if the display average bandwidth fits in the display
   9170 * available bandwidth (CIK).
   9171 * Used for display watermark bandwidth calculations
   9172 * Returns true if the display fits, false if not.
   9173 */
   9174static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
   9175{
   9176	if (dce8_average_bandwidth(wm) <=
   9177	    (dce8_available_bandwidth(wm) / wm->num_heads))
   9178		return true;
   9179	else
   9180		return false;
   9181}
   9182
   9183/**
   9184 * dce8_check_latency_hiding - check latency hiding
   9185 *
   9186 * @wm: watermark calculation data
   9187 *
   9188 * Check latency hiding (CIK).
   9189 * Used for display watermark bandwidth calculations
   9190 * Returns true if the display fits, false if not.
   9191 */
   9192static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
   9193{
   9194	u32 lb_partitions = wm->lb_size / wm->src_width;
   9195	u32 line_time = wm->active_time + wm->blank_time;
   9196	u32 latency_tolerant_lines;
   9197	u32 latency_hiding;
   9198	fixed20_12 a;
   9199
   9200	a.full = dfixed_const(1);
   9201	if (wm->vsc.full > a.full)
   9202		latency_tolerant_lines = 1;
   9203	else {
   9204		if (lb_partitions <= (wm->vtaps + 1))
   9205			latency_tolerant_lines = 1;
   9206		else
   9207			latency_tolerant_lines = 2;
   9208	}
   9209
   9210	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
   9211
   9212	if (dce8_latency_watermark(wm) <= latency_hiding)
   9213		return true;
   9214	else
   9215		return false;
   9216}
   9217
   9218/**
   9219 * dce8_program_watermarks - program display watermarks
   9220 *
   9221 * @rdev: radeon_device pointer
   9222 * @radeon_crtc: the selected display controller
   9223 * @lb_size: line buffer size
   9224 * @num_heads: number of display controllers in use
   9225 *
   9226 * Calculate and program the display watermarks for the
   9227 * selected display controller (CIK).
   9228 */
   9229static void dce8_program_watermarks(struct radeon_device *rdev,
   9230				    struct radeon_crtc *radeon_crtc,
   9231				    u32 lb_size, u32 num_heads)
   9232{
   9233	struct drm_display_mode *mode = &radeon_crtc->base.mode;
   9234	struct dce8_wm_params wm_low, wm_high;
   9235	u32 active_time;
   9236	u32 line_time = 0;
   9237	u32 latency_watermark_a = 0, latency_watermark_b = 0;
   9238	u32 tmp, wm_mask;
   9239
   9240	if (radeon_crtc->base.enabled && num_heads && mode) {
   9241		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
   9242					    (u32)mode->clock);
   9243		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
   9244					  (u32)mode->clock);
   9245		line_time = min(line_time, (u32)65535);
   9246
   9247		/* watermark for high clocks */
   9248		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
   9249		    rdev->pm.dpm_enabled) {
   9250			wm_high.yclk =
   9251				radeon_dpm_get_mclk(rdev, false) * 10;
   9252			wm_high.sclk =
   9253				radeon_dpm_get_sclk(rdev, false) * 10;
   9254		} else {
   9255			wm_high.yclk = rdev->pm.current_mclk * 10;
   9256			wm_high.sclk = rdev->pm.current_sclk * 10;
   9257		}
   9258
   9259		wm_high.disp_clk = mode->clock;
   9260		wm_high.src_width = mode->crtc_hdisplay;
   9261		wm_high.active_time = active_time;
   9262		wm_high.blank_time = line_time - wm_high.active_time;
   9263		wm_high.interlaced = false;
   9264		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
   9265			wm_high.interlaced = true;
   9266		wm_high.vsc = radeon_crtc->vsc;
   9267		wm_high.vtaps = 1;
   9268		if (radeon_crtc->rmx_type != RMX_OFF)
   9269			wm_high.vtaps = 2;
   9270		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
   9271		wm_high.lb_size = lb_size;
   9272		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
   9273		wm_high.num_heads = num_heads;
   9274
   9275		/* set for high clocks */
   9276		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
   9277
   9278		/* possibly force display priority to high */
   9279		/* should really do this at mode validation time... */
   9280		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
   9281		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
   9282		    !dce8_check_latency_hiding(&wm_high) ||
   9283		    (rdev->disp_priority == 2)) {
   9284			DRM_DEBUG_KMS("force priority to high\n");
   9285		}
   9286
   9287		/* watermark for low clocks */
   9288		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
   9289		    rdev->pm.dpm_enabled) {
   9290			wm_low.yclk =
   9291				radeon_dpm_get_mclk(rdev, true) * 10;
   9292			wm_low.sclk =
   9293				radeon_dpm_get_sclk(rdev, true) * 10;
   9294		} else {
   9295			wm_low.yclk = rdev->pm.current_mclk * 10;
   9296			wm_low.sclk = rdev->pm.current_sclk * 10;
   9297		}
   9298
   9299		wm_low.disp_clk = mode->clock;
   9300		wm_low.src_width = mode->crtc_hdisplay;
   9301		wm_low.active_time = active_time;
   9302		wm_low.blank_time = line_time - wm_low.active_time;
   9303		wm_low.interlaced = false;
   9304		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
   9305			wm_low.interlaced = true;
   9306		wm_low.vsc = radeon_crtc->vsc;
   9307		wm_low.vtaps = 1;
   9308		if (radeon_crtc->rmx_type != RMX_OFF)
   9309			wm_low.vtaps = 2;
   9310		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
   9311		wm_low.lb_size = lb_size;
   9312		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
   9313		wm_low.num_heads = num_heads;
   9314
   9315		/* set for low clocks */
   9316		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
   9317
   9318		/* possibly force display priority to high */
   9319		/* should really do this at mode validation time... */
   9320		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
   9321		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
   9322		    !dce8_check_latency_hiding(&wm_low) ||
   9323		    (rdev->disp_priority == 2)) {
   9324			DRM_DEBUG_KMS("force priority to high\n");
   9325		}
   9326
   9327		/* Save number of lines the linebuffer leads before the scanout */
   9328		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
   9329	}
   9330
   9331	/* select wm A */
   9332	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
   9333	tmp = wm_mask;
   9334	tmp &= ~LATENCY_WATERMARK_MASK(3);
   9335	tmp |= LATENCY_WATERMARK_MASK(1);
   9336	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
   9337	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
   9338	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
   9339		LATENCY_HIGH_WATERMARK(line_time)));
   9340	/* select wm B */
   9341	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
   9342	tmp &= ~LATENCY_WATERMARK_MASK(3);
   9343	tmp |= LATENCY_WATERMARK_MASK(2);
   9344	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
   9345	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
   9346	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
   9347		LATENCY_HIGH_WATERMARK(line_time)));
   9348	/* restore original selection */
   9349	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
   9350
   9351	/* save values for DPM */
   9352	radeon_crtc->line_time = line_time;
   9353	radeon_crtc->wm_high = latency_watermark_a;
   9354	radeon_crtc->wm_low = latency_watermark_b;
   9355}
   9356
   9357/**
   9358 * dce8_bandwidth_update - program display watermarks
   9359 *
   9360 * @rdev: radeon_device pointer
   9361 *
   9362 * Calculate and program the display watermarks and line
   9363 * buffer allocation (CIK).
   9364 */
   9365void dce8_bandwidth_update(struct radeon_device *rdev)
   9366{
   9367	struct drm_display_mode *mode = NULL;
   9368	u32 num_heads = 0, lb_size;
   9369	int i;
   9370
   9371	if (!rdev->mode_info.mode_config_initialized)
   9372		return;
   9373
   9374	radeon_update_display_priority(rdev);
   9375
   9376	for (i = 0; i < rdev->num_crtc; i++) {
   9377		if (rdev->mode_info.crtcs[i]->base.enabled)
   9378			num_heads++;
   9379	}
   9380	for (i = 0; i < rdev->num_crtc; i++) {
   9381		mode = &rdev->mode_info.crtcs[i]->base.mode;
   9382		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
   9383		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
   9384	}
   9385}
   9386
   9387/**
   9388 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
   9389 *
   9390 * @rdev: radeon_device pointer
   9391 *
   9392 * Fetches a GPU clock counter snapshot (SI).
   9393 * Returns the 64 bit clock counter snapshot.
   9394 */
   9395uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
   9396{
   9397	uint64_t clock;
   9398
   9399	mutex_lock(&rdev->gpu_clock_mutex);
   9400	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
   9401	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
   9402		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
   9403	mutex_unlock(&rdev->gpu_clock_mutex);
   9404	return clock;
   9405}
   9406
   9407static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
   9408			     u32 cntl_reg, u32 status_reg)
   9409{
   9410	int r, i;
   9411	struct atom_clock_dividers dividers;
   9412	uint32_t tmp;
   9413
   9414	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
   9415					   clock, false, &dividers);
   9416	if (r)
   9417		return r;
   9418
   9419	tmp = RREG32_SMC(cntl_reg);
   9420	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
   9421	tmp |= dividers.post_divider;
   9422	WREG32_SMC(cntl_reg, tmp);
   9423
   9424	for (i = 0; i < 100; i++) {
   9425		if (RREG32_SMC(status_reg) & DCLK_STATUS)
   9426			break;
   9427		mdelay(10);
   9428	}
   9429	if (i == 100)
   9430		return -ETIMEDOUT;
   9431
   9432	return 0;
   9433}
   9434
   9435int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
   9436{
   9437	int r = 0;
   9438
   9439	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
   9440	if (r)
   9441		return r;
   9442
   9443	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
   9444	return r;
   9445}
   9446
   9447int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
   9448{
   9449	int r, i;
   9450	struct atom_clock_dividers dividers;
   9451	u32 tmp;
   9452
   9453	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
   9454					   ecclk, false, &dividers);
   9455	if (r)
   9456		return r;
   9457
   9458	for (i = 0; i < 100; i++) {
   9459		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
   9460			break;
   9461		mdelay(10);
   9462	}
   9463	if (i == 100)
   9464		return -ETIMEDOUT;
   9465
   9466	tmp = RREG32_SMC(CG_ECLK_CNTL);
   9467	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
   9468	tmp |= dividers.post_divider;
   9469	WREG32_SMC(CG_ECLK_CNTL, tmp);
   9470
   9471	for (i = 0; i < 100; i++) {
   9472		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
   9473			break;
   9474		mdelay(10);
   9475	}
   9476	if (i == 100)
   9477		return -ETIMEDOUT;
   9478
   9479	return 0;
   9480}
   9481
   9482static void cik_pcie_gen3_enable(struct radeon_device *rdev)
   9483{
   9484	struct pci_dev *root = rdev->pdev->bus->self;
   9485	enum pci_bus_speed speed_cap;
   9486	u32 speed_cntl, current_data_rate;
   9487	int i;
   9488	u16 tmp16;
   9489
   9490	if (pci_is_root_bus(rdev->pdev->bus))
   9491		return;
   9492
   9493	if (radeon_pcie_gen2 == 0)
   9494		return;
   9495
   9496	if (rdev->flags & RADEON_IS_IGP)
   9497		return;
   9498
   9499	if (!(rdev->flags & RADEON_IS_PCIE))
   9500		return;
   9501
   9502	speed_cap = pcie_get_speed_cap(root);
   9503	if (speed_cap == PCI_SPEED_UNKNOWN)
   9504		return;
   9505
   9506	if ((speed_cap != PCIE_SPEED_8_0GT) &&
   9507	    (speed_cap != PCIE_SPEED_5_0GT))
   9508		return;
   9509
   9510	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
   9511	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
   9512		LC_CURRENT_DATA_RATE_SHIFT;
   9513	if (speed_cap == PCIE_SPEED_8_0GT) {
   9514		if (current_data_rate == 2) {
   9515			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
   9516			return;
   9517		}
   9518		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
   9519	} else if (speed_cap == PCIE_SPEED_5_0GT) {
   9520		if (current_data_rate == 1) {
   9521			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
   9522			return;
   9523		}
   9524		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
   9525	}
   9526
   9527	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
   9528		return;
   9529
   9530	if (speed_cap == PCIE_SPEED_8_0GT) {
   9531		/* re-try equalization if gen3 is not already enabled */
   9532		if (current_data_rate != 2) {
   9533			u16 bridge_cfg, gpu_cfg;
   9534			u16 bridge_cfg2, gpu_cfg2;
   9535			u32 max_lw, current_lw, tmp;
   9536
   9537			pcie_capability_read_word(root, PCI_EXP_LNKCTL,
   9538						  &bridge_cfg);
   9539			pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
   9540						  &gpu_cfg);
   9541
   9542			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
   9543			pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
   9544
   9545			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
   9546			pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
   9547						   tmp16);
   9548
   9549			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
   9550			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
   9551			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
   9552
   9553			if (current_lw < max_lw) {
   9554				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
   9555				if (tmp & LC_RENEGOTIATION_SUPPORT) {
   9556					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
   9557					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
   9558					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
   9559					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
   9560				}
   9561			}
   9562
   9563			for (i = 0; i < 10; i++) {
   9564				/* check status */
   9565				pcie_capability_read_word(rdev->pdev,
   9566							  PCI_EXP_DEVSTA,
   9567							  &tmp16);
   9568				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
   9569					break;
   9570
   9571				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
   9572							  &bridge_cfg);
   9573				pcie_capability_read_word(rdev->pdev,
   9574							  PCI_EXP_LNKCTL,
   9575							  &gpu_cfg);
   9576
   9577				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
   9578							  &bridge_cfg2);
   9579				pcie_capability_read_word(rdev->pdev,
   9580							  PCI_EXP_LNKCTL2,
   9581							  &gpu_cfg2);
   9582
   9583				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
   9584				tmp |= LC_SET_QUIESCE;
   9585				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
   9586
   9587				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
   9588				tmp |= LC_REDO_EQ;
   9589				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
   9590
   9591				msleep(100);
   9592
   9593				/* linkctl */
   9594				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
   9595							  &tmp16);
   9596				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
   9597				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
   9598				pcie_capability_write_word(root, PCI_EXP_LNKCTL,
   9599							   tmp16);
   9600
   9601				pcie_capability_read_word(rdev->pdev,
   9602							  PCI_EXP_LNKCTL,
   9603							  &tmp16);
   9604				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
   9605				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
   9606				pcie_capability_write_word(rdev->pdev,
   9607							   PCI_EXP_LNKCTL,
   9608							   tmp16);
   9609
   9610				/* linkctl2 */
   9611				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
   9612							  &tmp16);
   9613				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
   9614					   PCI_EXP_LNKCTL2_TX_MARGIN);
   9615				tmp16 |= (bridge_cfg2 &
   9616					  (PCI_EXP_LNKCTL2_ENTER_COMP |
   9617					   PCI_EXP_LNKCTL2_TX_MARGIN));
   9618				pcie_capability_write_word(root,
   9619							   PCI_EXP_LNKCTL2,
   9620							   tmp16);
   9621
   9622				pcie_capability_read_word(rdev->pdev,
   9623							  PCI_EXP_LNKCTL2,
   9624							  &tmp16);
   9625				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
   9626					   PCI_EXP_LNKCTL2_TX_MARGIN);
   9627				tmp16 |= (gpu_cfg2 &
   9628					  (PCI_EXP_LNKCTL2_ENTER_COMP |
   9629					   PCI_EXP_LNKCTL2_TX_MARGIN));
   9630				pcie_capability_write_word(rdev->pdev,
   9631							   PCI_EXP_LNKCTL2,
   9632							   tmp16);
   9633
   9634				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
   9635				tmp &= ~LC_SET_QUIESCE;
   9636				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
   9637			}
   9638		}
   9639	}
   9640
   9641	/* set the link speed */
   9642	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
   9643	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
   9644	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
   9645
   9646	pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
   9647	tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
   9648	if (speed_cap == PCIE_SPEED_8_0GT)
   9649		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
   9650	else if (speed_cap == PCIE_SPEED_5_0GT)
   9651		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
   9652	else
   9653		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
   9654	pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
   9655
   9656	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
   9657	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
   9658	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
   9659
   9660	for (i = 0; i < rdev->usec_timeout; i++) {
   9661		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
   9662		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
   9663			break;
   9664		udelay(1);
   9665	}
   9666}
   9667
   9668static void cik_program_aspm(struct radeon_device *rdev)
   9669{
   9670	u32 data, orig;
   9671	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
   9672	bool disable_clkreq = false;
   9673
   9674	if (radeon_aspm == 0)
   9675		return;
   9676
   9677	/* XXX double check IGPs */
   9678	if (rdev->flags & RADEON_IS_IGP)
   9679		return;
   9680
   9681	if (!(rdev->flags & RADEON_IS_PCIE))
   9682		return;
   9683
   9684	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
   9685	data &= ~LC_XMIT_N_FTS_MASK;
   9686	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
   9687	if (orig != data)
   9688		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
   9689
   9690	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
   9691	data |= LC_GO_TO_RECOVERY;
   9692	if (orig != data)
   9693		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
   9694
   9695	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
   9696	data |= P_IGNORE_EDB_ERR;
   9697	if (orig != data)
   9698		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
   9699
   9700	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
   9701	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
   9702	data |= LC_PMI_TO_L1_DIS;
   9703	if (!disable_l0s)
   9704		data |= LC_L0S_INACTIVITY(7);
   9705
   9706	if (!disable_l1) {
   9707		data |= LC_L1_INACTIVITY(7);
   9708		data &= ~LC_PMI_TO_L1_DIS;
   9709		if (orig != data)
   9710			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
   9711
   9712		if (!disable_plloff_in_l1) {
   9713			bool clk_req_support;
   9714
   9715			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
   9716			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
   9717			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
   9718			if (orig != data)
   9719				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
   9720
   9721			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
   9722			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
   9723			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
   9724			if (orig != data)
   9725				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
   9726
   9727			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
   9728			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
   9729			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
   9730			if (orig != data)
   9731				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
   9732
   9733			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
   9734			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
   9735			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
   9736			if (orig != data)
   9737				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
   9738
   9739			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
   9740			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
   9741			data |= LC_DYN_LANES_PWR_STATE(3);
   9742			if (orig != data)
   9743				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
   9744
   9745			if (!disable_clkreq &&
   9746			    !pci_is_root_bus(rdev->pdev->bus)) {
   9747				struct pci_dev *root = rdev->pdev->bus->self;
   9748				u32 lnkcap;
   9749
   9750				clk_req_support = false;
   9751				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
   9752				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
   9753					clk_req_support = true;
   9754			} else {
   9755				clk_req_support = false;
   9756			}
   9757
   9758			if (clk_req_support) {
   9759				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
   9760				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
   9761				if (orig != data)
   9762					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
   9763
   9764				orig = data = RREG32_SMC(THM_CLK_CNTL);
   9765				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
   9766				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
   9767				if (orig != data)
   9768					WREG32_SMC(THM_CLK_CNTL, data);
   9769
   9770				orig = data = RREG32_SMC(MISC_CLK_CTRL);
   9771				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
   9772				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
   9773				if (orig != data)
   9774					WREG32_SMC(MISC_CLK_CTRL, data);
   9775
   9776				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
   9777				data &= ~BCLK_AS_XCLK;
   9778				if (orig != data)
   9779					WREG32_SMC(CG_CLKPIN_CNTL, data);
   9780
   9781				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
   9782				data &= ~FORCE_BIF_REFCLK_EN;
   9783				if (orig != data)
   9784					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
   9785
   9786				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
   9787				data &= ~MPLL_CLKOUT_SEL_MASK;
   9788				data |= MPLL_CLKOUT_SEL(4);
   9789				if (orig != data)
   9790					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
   9791			}
   9792		}
   9793	} else {
   9794		if (orig != data)
   9795			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
   9796	}
   9797
   9798	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
   9799	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
   9800	if (orig != data)
   9801		WREG32_PCIE_PORT(PCIE_CNTL2, data);
   9802
   9803	if (!disable_l0s) {
   9804		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
   9805		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
   9806			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
   9807			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
   9808				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
   9809				data &= ~LC_L0S_INACTIVITY_MASK;
   9810				if (orig != data)
   9811					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
   9812			}
   9813		}
   9814	}
   9815}