a5xx_gpu.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
a5xx_gpu.c (56350B)
      1// SPDX-License-Identifier: GPL-2.0-only
      2/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
      3 */
      4
      5#include <linux/kernel.h>
      6#include <linux/types.h>
      7#include <linux/cpumask.h>
      8#include <linux/qcom_scm.h>
      9#include <linux/pm_opp.h>
     10#include <linux/nvmem-consumer.h>
     11#include <linux/slab.h>
     12#include "msm_gem.h"
     13#include "msm_mmu.h"
     14#include "a5xx_gpu.h"
     15
     16extern bool hang_debug;
     17static void a5xx_dump(struct msm_gpu *gpu);
     18
     19#define GPU_PAS_ID 13
     20
     21static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
     22{
     23	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
     24	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
     25
     26	if (a5xx_gpu->has_whereami) {
     27		OUT_PKT7(ring, CP_WHERE_AM_I, 2);
     28		OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
     29		OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
     30	}
     31}
     32
     33void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
     34		bool sync)
     35{
     36	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
     37	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
     38	uint32_t wptr;
     39	unsigned long flags;
     40
     41	/*
     42	 * Most flush operations need to issue a WHERE_AM_I opcode to sync up
     43	 * the rptr shadow
     44	 */
     45	if (sync)
     46		update_shadow_rptr(gpu, ring);
     47
     48	spin_lock_irqsave(&ring->preempt_lock, flags);
     49
     50	/* Copy the shadow to the actual register */
     51	ring->cur = ring->next;
     52
     53	/* Make sure to wrap wptr if we need to */
     54	wptr = get_wptr(ring);
     55
     56	spin_unlock_irqrestore(&ring->preempt_lock, flags);
     57
     58	/* Make sure everything is posted before making a decision */
     59	mb();
     60
     61	/* Update HW if this is the current ring and we are not in preempt */
     62	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
     63		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
     64}
     65
     66static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
     67{
     68	struct msm_ringbuffer *ring = submit->ring;
     69	struct msm_gem_object *obj;
     70	uint32_t *ptr, dwords;
     71	unsigned int i;
     72
     73	for (i = 0; i < submit->nr_cmds; i++) {
     74		switch (submit->cmd[i].type) {
     75		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
     76			break;
     77		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
     78			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
     79				break;
     80			fallthrough;
     81		case MSM_SUBMIT_CMD_BUF:
     82			/* copy commands into RB: */
     83			obj = submit->bos[submit->cmd[i].idx].obj;
     84			dwords = submit->cmd[i].size;
     85
     86			ptr = msm_gem_get_vaddr(&obj->base);
     87
     88			/* _get_vaddr() shouldn't fail at this point,
     89			 * since we've already mapped it once in
     90			 * submit_reloc()
     91			 */
     92			if (WARN_ON(!ptr))
     93				return;
     94
     95			for (i = 0; i < dwords; i++) {
     96				/* normally the OUT_PKTn() would wait
     97				 * for space for the packet.  But since
     98				 * we just OUT_RING() the whole thing,
     99				 * need to call adreno_wait_ring()
    100				 * ourself:
    101				 */
    102				adreno_wait_ring(ring, 1);
    103				OUT_RING(ring, ptr[i]);
    104			}
    105
    106			msm_gem_put_vaddr(&obj->base);
    107
    108			break;
    109		}
    110	}
    111
    112	a5xx_flush(gpu, ring, true);
    113	a5xx_preempt_trigger(gpu);
    114
    115	/* we might not necessarily have a cmd from userspace to
    116	 * trigger an event to know that submit has completed, so
    117	 * do this manually:
    118	 */
    119	a5xx_idle(gpu, ring);
    120	ring->memptrs->fence = submit->seqno;
    121	msm_gpu_retire(gpu);
    122}
    123
    124static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
    125{
    126	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    127	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
    128	struct msm_ringbuffer *ring = submit->ring;
    129	unsigned int i, ibs = 0;
    130
    131	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
    132		gpu->cur_ctx_seqno = 0;
    133		a5xx_submit_in_rb(gpu, submit);
    134		return;
    135	}
    136
    137	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
    138	OUT_RING(ring, 0x02);
    139
    140	/* Turn off protected mode to write to special registers */
    141	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
    142	OUT_RING(ring, 0);
    143
    144	/* Set the save preemption record for the ring/command */
    145	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
    146	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
    147	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
    148
    149	/* Turn back on protected mode */
    150	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
    151	OUT_RING(ring, 1);
    152
    153	/* Enable local preemption for finegrain preemption */
    154	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
    155	OUT_RING(ring, 0x02);
    156
    157	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
    158	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
    159	OUT_RING(ring, 0x02);
    160
    161	/* Submit the commands */
    162	for (i = 0; i < submit->nr_cmds; i++) {
    163		switch (submit->cmd[i].type) {
    164		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
    165			break;
    166		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
    167			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
    168				break;
    169			fallthrough;
    170		case MSM_SUBMIT_CMD_BUF:
    171			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
    172			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
    173			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
    174			OUT_RING(ring, submit->cmd[i].size);
    175			ibs++;
    176			break;
    177		}
    178
    179		/*
    180		 * Periodically update shadow-wptr if needed, so that we
    181		 * can see partial progress of submits with large # of
    182		 * cmds.. otherwise we could needlessly stall waiting for
    183		 * ringbuffer state, simply due to looking at a shadow
    184		 * rptr value that has not been updated
    185		 */
    186		if ((ibs % 32) == 0)
    187			update_shadow_rptr(gpu, ring);
    188	}
    189
    190	/*
    191	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
    192	 * are done rendering - otherwise a lucky preemption would start
    193	 * replaying from the last checkpoint
    194	 */
    195	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
    196	OUT_RING(ring, 0);
    197	OUT_RING(ring, 0);
    198	OUT_RING(ring, 0);
    199	OUT_RING(ring, 0);
    200	OUT_RING(ring, 0);
    201
    202	/* Turn off IB level preemptions */
    203	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
    204	OUT_RING(ring, 0x01);
    205
    206	/* Write the fence to the scratch register */
    207	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
    208	OUT_RING(ring, submit->seqno);
    209
    210	/*
    211	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
    212	 * timestamp is written to the memory and then triggers the interrupt
    213	 */
    214	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
    215	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
    216		CP_EVENT_WRITE_0_IRQ);
    217	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
    218	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
    219	OUT_RING(ring, submit->seqno);
    220
    221	/* Yield the floor on command completion */
    222	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
    223	/*
    224	 * If dword[2:1] are non zero, they specify an address for the CP to
    225	 * write the value of dword[3] to on preemption complete. Write 0 to
    226	 * skip the write
    227	 */
    228	OUT_RING(ring, 0x00);
    229	OUT_RING(ring, 0x00);
    230	/* Data value - not used if the address above is 0 */
    231	OUT_RING(ring, 0x01);
    232	/* Set bit 0 to trigger an interrupt on preempt complete */
    233	OUT_RING(ring, 0x01);
    234
    235	/* A WHERE_AM_I packet is not needed after a YIELD */
    236	a5xx_flush(gpu, ring, false);
    237
    238	/* Check to see if we need to start preemption */
    239	a5xx_preempt_trigger(gpu);
    240}
    241
    242static const struct adreno_five_hwcg_regs {
    243	u32 offset;
    244	u32 value;
    245} a5xx_hwcg[] = {
    246	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
    247	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
    248	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
    249	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
    250	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
    251	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
    252	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
    253	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
    254	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
    255	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
    256	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
    257	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
    258	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
    259	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
    260	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
    261	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
    262	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
    263	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
    264	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
    265	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
    266	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
    267	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
    268	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
    269	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
    270	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
    271	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
    272	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
    273	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
    274	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
    275	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
    276	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
    277	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
    278	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
    279	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
    280	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
    281	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
    282	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
    283	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
    284	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
    285	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
    286	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
    287	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
    288	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
    289	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
    290	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
    291	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
    292	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
    293	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
    294	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
    295	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
    296	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
    297	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
    298	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
    299	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
    300	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
    301	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
    302	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
    303	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
    304	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
    305	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
    306	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
    307	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
    308	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
    309	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
    310	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
    311	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
    312	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
    313	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
    314	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
    315	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
    316	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
    317	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
    318	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
    319	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
    320	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
    321	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
    322	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
    323	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
    324	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
    325	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
    326	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
    327	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
    328	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
    329	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
    330	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
    331	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
    332	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
    333	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
    334	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
    335	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
    336	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
    337	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
    338}, a50x_hwcg[] = {
    339	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
    340	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
    341	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
    342	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
    343	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
    344	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
    345	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
    346	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
    347	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
    348	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
    349	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
    350	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
    351	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
    352	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
    353	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
    354	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
    355	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
    356	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4},
    357	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
    358	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
    359	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
    360	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
    361	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
    362	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
    363	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
    364	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
    365	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
    366	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
    367	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
    368	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
    369	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
    370	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
    371	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
    372	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
    373	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
    374	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
    375	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
    376	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
    377}, a512_hwcg[] = {
    378	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
    379	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
    380	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
    381	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
    382	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
    383	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
    384	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
    385	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
    386	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
    387	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
    388	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
    389	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
    390	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
    391	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
    392	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
    393	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
    394	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
    395	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
    396	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
    397	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
    398	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
    399	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
    400	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
    401	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
    402	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
    403	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
    404	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
    405	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
    406	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
    407	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
    408	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
    409	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
    410	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
    411	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
    412	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
    413	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
    414	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
    415	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
    416	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
    417	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
    418	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
    419	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
    420	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
    421	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
    422	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
    423	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
    424	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
    425	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
    426	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
    427	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
    428	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
    429	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
    430	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
    431	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
    432	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
    433	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
    434};
    435
    436void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
    437{
    438	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    439	const struct adreno_five_hwcg_regs *regs;
    440	unsigned int i, sz;
    441
    442	if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu)) {
    443		regs = a50x_hwcg;
    444		sz = ARRAY_SIZE(a50x_hwcg);
    445	} else if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu)) {
    446		regs = a512_hwcg;
    447		sz = ARRAY_SIZE(a512_hwcg);
    448	} else {
    449		regs = a5xx_hwcg;
    450		sz = ARRAY_SIZE(a5xx_hwcg);
    451	}
    452
    453	for (i = 0; i < sz; i++)
    454		gpu_write(gpu, regs[i].offset,
    455			  state ? regs[i].value : 0);
    456
    457	if (adreno_is_a540(adreno_gpu)) {
    458		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
    459		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
    460	}
    461
    462	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
    463	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
    464}
    465
    466static int a5xx_me_init(struct msm_gpu *gpu)
    467{
    468	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    469	struct msm_ringbuffer *ring = gpu->rb[0];
    470
    471	OUT_PKT7(ring, CP_ME_INIT, 8);
    472
    473	OUT_RING(ring, 0x0000002F);
    474
    475	/* Enable multiple hardware contexts */
    476	OUT_RING(ring, 0x00000003);
    477
    478	/* Enable error detection */
    479	OUT_RING(ring, 0x20000000);
    480
    481	/* Don't enable header dump */
    482	OUT_RING(ring, 0x00000000);
    483	OUT_RING(ring, 0x00000000);
    484
    485	/* Specify workarounds for various microcode issues */
    486	if (adreno_is_a506(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
    487		/* Workaround for token end syncs
    488		 * Force a WFI after every direct-render 3D mode draw and every
    489		 * 2D mode 3 draw
    490		 */
    491		OUT_RING(ring, 0x0000000B);
    492	} else if (adreno_is_a510(adreno_gpu)) {
    493		/* Workaround for token and syncs */
    494		OUT_RING(ring, 0x00000001);
    495	} else {
    496		/* No workarounds enabled */
    497		OUT_RING(ring, 0x00000000);
    498	}
    499
    500	OUT_RING(ring, 0x00000000);
    501	OUT_RING(ring, 0x00000000);
    502
    503	a5xx_flush(gpu, ring, true);
    504	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
    505}
    506
    507static int a5xx_preempt_start(struct msm_gpu *gpu)
    508{
    509	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    510	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
    511	struct msm_ringbuffer *ring = gpu->rb[0];
    512
    513	if (gpu->nr_rings == 1)
    514		return 0;
    515
    516	/* Turn off protected mode to write to special registers */
    517	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
    518	OUT_RING(ring, 0);
    519
    520	/* Set the save preemption record for the ring/command */
    521	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
    522	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
    523	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
    524
    525	/* Turn back on protected mode */
    526	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
    527	OUT_RING(ring, 1);
    528
    529	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
    530	OUT_RING(ring, 0x00);
    531
    532	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
    533	OUT_RING(ring, 0x01);
    534
    535	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
    536	OUT_RING(ring, 0x01);
    537
    538	/* Yield the floor on command completion */
    539	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
    540	OUT_RING(ring, 0x00);
    541	OUT_RING(ring, 0x00);
    542	OUT_RING(ring, 0x01);
    543	OUT_RING(ring, 0x01);
    544
    545	/* The WHERE_AMI_I packet is not needed after a YIELD is issued */
    546	a5xx_flush(gpu, ring, false);
    547
    548	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
    549}
    550
    551static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
    552		struct drm_gem_object *obj)
    553{
    554	u32 *buf = msm_gem_get_vaddr(obj);
    555
    556	if (IS_ERR(buf))
    557		return;
    558
    559	/*
    560	 * If the lowest nibble is 0xa that is an indication that this microcode
    561	 * has been patched. The actual version is in dword [3] but we only care
    562	 * about the patchlevel which is the lowest nibble of dword [3]
    563	 */
    564	if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
    565		a5xx_gpu->has_whereami = true;
    566
    567	msm_gem_put_vaddr(obj);
    568}
    569
    570static int a5xx_ucode_init(struct msm_gpu *gpu)
    571{
    572	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    573	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
    574	int ret;
    575
    576	if (!a5xx_gpu->pm4_bo) {
    577		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
    578			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
    579
    580
    581		if (IS_ERR(a5xx_gpu->pm4_bo)) {
    582			ret = PTR_ERR(a5xx_gpu->pm4_bo);
    583			a5xx_gpu->pm4_bo = NULL;
    584			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
    585				ret);
    586			return ret;
    587		}
    588
    589		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
    590	}
    591
    592	if (!a5xx_gpu->pfp_bo) {
    593		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
    594			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
    595
    596		if (IS_ERR(a5xx_gpu->pfp_bo)) {
    597			ret = PTR_ERR(a5xx_gpu->pfp_bo);
    598			a5xx_gpu->pfp_bo = NULL;
    599			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
    600				ret);
    601			return ret;
    602		}
    603
    604		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
    605		a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
    606	}
    607
    608	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
    609		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
    610
    611	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
    612		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
    613
    614	return 0;
    615}
    616
    617#define SCM_GPU_ZAP_SHADER_RESUME 0
    618
    619static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
    620{
    621	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    622	int ret;
    623
    624	/*
    625	 * Adreno 506 have CPZ Retention feature and doesn't require
    626	 * to resume zap shader
    627	 */
    628	if (adreno_is_a506(adreno_gpu))
    629		return 0;
    630
    631	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
    632	if (ret)
    633		DRM_ERROR("%s: zap-shader resume failed: %d\n",
    634			gpu->name, ret);
    635
    636	return ret;
    637}
    638
    639static int a5xx_zap_shader_init(struct msm_gpu *gpu)
    640{
    641	static bool loaded;
    642	int ret;
    643
    644	/*
    645	 * If the zap shader is already loaded into memory we just need to kick
    646	 * the remote processor to reinitialize it
    647	 */
    648	if (loaded)
    649		return a5xx_zap_shader_resume(gpu);
    650
    651	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
    652
    653	loaded = !ret;
    654	return ret;
    655}
    656
    657#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
    658	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
    659	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
    660	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
    661	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
    662	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
    663	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
    664	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
    665	  A5XX_RBBM_INT_0_MASK_CP_SW | \
    666	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
    667	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
    668	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
    669
    670static int a5xx_hw_init(struct msm_gpu *gpu)
    671{
    672	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    673	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
    674	u32 regbit;
    675	int ret;
    676
    677	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
    678
    679	if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
    680	    adreno_is_a540(adreno_gpu))
    681		gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
    682
    683	/* Make all blocks contribute to the GPU BUSY perf counter */
    684	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
    685
    686	/* Enable RBBM error reporting bits */
    687	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
    688
    689	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
    690		/*
    691		 * Mask out the activity signals from RB1-3 to avoid false
    692		 * positives
    693		 */
    694
    695		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
    696			0xF0000000);
    697		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
    698			0xFFFFFFFF);
    699		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
    700			0xFFFFFFFF);
    701		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
    702			0xFFFFFFFF);
    703		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
    704			0xFFFFFFFF);
    705		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
    706			0xFFFFFFFF);
    707		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
    708			0xFFFFFFFF);
    709		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
    710			0xFFFFFFFF);
    711	}
    712
    713	/* Enable fault detection */
    714	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
    715		(1 << 30) | 0xFFFF);
    716
    717	/* Turn on performance counters */
    718	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
    719
    720	/* Select CP0 to always count cycles */
    721	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
    722
    723	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
    724	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
    725
    726	/* Increase VFD cache access so LRZ and other data gets evicted less */
    727	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
    728
    729	/* Disable L2 bypass in the UCHE */
    730	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
    731	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
    732	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
    733	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
    734
    735	/* Set the GMEM VA range (0 to gpu->gmem) */
    736	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
    737	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
    738	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
    739		0x00100000 + adreno_gpu->gmem - 1);
    740	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
    741
    742	if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) ||
    743	    adreno_is_a510(adreno_gpu)) {
    744		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
    745		if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu))
    746			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
    747		else
    748			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
    749		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
    750		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
    751	} else {
    752		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
    753		if (adreno_is_a530(adreno_gpu))
    754			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
    755		else
    756			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
    757		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
    758		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
    759	}
    760
    761	if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu))
    762		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
    763			  (0x100 << 11 | 0x100 << 22));
    764	else if (adreno_is_a509(adreno_gpu) || adreno_is_a510(adreno_gpu) ||
    765		 adreno_is_a512(adreno_gpu))
    766		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
    767			  (0x200 << 11 | 0x200 << 22));
    768	else
    769		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
    770			  (0x400 << 11 | 0x300 << 22));
    771
    772	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
    773		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
    774
    775	/*
    776	 * Disable the RB sampler datapath DP2 clock gating optimization
    777	 * for 1-SP GPUs, as it is enabled by default.
    778	 */
    779	if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) ||
    780	    adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu))
    781		gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, 0, (1 << 9));
    782
    783	/* Disable UCHE global filter as SP can invalidate/flush independently */
    784	gpu_write(gpu, REG_A5XX_UCHE_MODE_CNTL, BIT(29));
    785
    786	/* Enable USE_RETENTION_FLOPS */
    787	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
    788
    789	/* Enable ME/PFP split notification */
    790	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
    791
    792	/*
    793	 *  In A5x, CCU can send context_done event of a particular context to
    794	 *  UCHE which ultimately reaches CP even when there is valid
    795	 *  transaction of that context inside CCU. This can let CP to program
    796	 *  config registers, which will make the "valid transaction" inside
    797	 *  CCU to be interpreted differently. This can cause gpu fault. This
    798	 *  bug is fixed in latest A510 revision. To enable this bug fix -
    799	 *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
    800	 *  (disable). For older A510 version this bit is unused.
    801	 */
    802	if (adreno_is_a510(adreno_gpu))
    803		gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
    804
    805	/* Enable HWCG */
    806	a5xx_set_hwcg(gpu, true);
    807
    808	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
    809
    810	/* Set the highest bank bit */
    811	if (adreno_is_a540(adreno_gpu))
    812		regbit = 2;
    813	else
    814		regbit = 1;
    815
    816	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7);
    817	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1);
    818
    819	if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
    820	    adreno_is_a540(adreno_gpu))
    821		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit);
    822
    823	/* Disable All flat shading optimization (ALLFLATOPTDIS) */
    824	gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10));
    825
    826	/* Protect registers from the CP */
    827	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
    828
    829	/* RBBM */
    830	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
    831	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
    832	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
    833	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
    834	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
    835	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
    836
    837	/* Content protect */
    838	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
    839		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
    840			16));
    841	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
    842		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
    843
    844	/* CP */
    845	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
    846	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
    847	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
    848	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
    849
    850	/* RB */
    851	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
    852	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
    853
    854	/* VPC */
    855	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
    856	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 16));
    857
    858	/* UCHE */
    859	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
    860
    861	/* SMMU */
    862	gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
    863			ADRENO_PROTECT_RW(0x10000, 0x8000));
    864
    865	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
    866	/*
    867	 * Disable the trusted memory range - we don't actually supported secure
    868	 * memory rendering at this point in time and we don't want to block off
    869	 * part of the virtual memory space.
    870	 */
    871	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
    872		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
    873	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
    874
    875	/* Put the GPU into 64 bit by default */
    876	gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
    877	gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
    878	gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
    879	gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
    880	gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
    881	gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
    882	gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
    883	gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
    884	gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
    885	gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
    886	gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
    887	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
    888
    889	/*
    890	 * VPC corner case with local memory load kill leads to corrupt
    891	 * internal state. Normal Disable does not work for all a5x chips.
    892	 * So do the following setting to disable it.
    893	 */
    894	if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
    895		gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
    896		gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
    897	}
    898
    899	ret = adreno_hw_init(gpu);
    900	if (ret)
    901		return ret;
    902
    903	if (adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))
    904		a5xx_gpmu_ucode_init(gpu);
    905
    906	ret = a5xx_ucode_init(gpu);
    907	if (ret)
    908		return ret;
    909
    910	/* Set the ringbuffer address */
    911	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
    912		gpu->rb[0]->iova);
    913
    914	/*
    915	 * If the microcode supports the WHERE_AM_I opcode then we can use that
    916	 * in lieu of the RPTR shadow and enable preemption. Otherwise, we
    917	 * can't safely use the RPTR shadow or preemption. In either case, the
    918	 * RPTR shadow should be disabled in hardware.
    919	 */
    920	gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
    921		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
    922
    923	/* Create a privileged buffer for the RPTR shadow */
    924	if (a5xx_gpu->has_whereami) {
    925		if (!a5xx_gpu->shadow_bo) {
    926			a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
    927				sizeof(u32) * gpu->nr_rings,
    928				MSM_BO_WC | MSM_BO_MAP_PRIV,
    929				gpu->aspace, &a5xx_gpu->shadow_bo,
    930				&a5xx_gpu->shadow_iova);
    931
    932			if (IS_ERR(a5xx_gpu->shadow))
    933				return PTR_ERR(a5xx_gpu->shadow);
    934
    935			msm_gem_object_set_name(a5xx_gpu->shadow_bo, "shadow");
    936		}
    937
    938		gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
    939			REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
    940	} else if (gpu->nr_rings > 1) {
    941		/* Disable preemption if WHERE_AM_I isn't available */
    942		a5xx_preempt_fini(gpu);
    943		gpu->nr_rings = 1;
    944	}
    945
    946	a5xx_preempt_hw_init(gpu);
    947
    948	/* Disable the interrupts through the initial bringup stage */
    949	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
    950
    951	/* Clear ME_HALT to start the micro engine */
    952	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
    953	ret = a5xx_me_init(gpu);
    954	if (ret)
    955		return ret;
    956
    957	ret = a5xx_power_init(gpu);
    958	if (ret)
    959		return ret;
    960
    961	/*
    962	 * Send a pipeline event stat to get misbehaving counters to start
    963	 * ticking correctly
    964	 */
    965	if (adreno_is_a530(adreno_gpu)) {
    966		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
    967		OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
    968
    969		a5xx_flush(gpu, gpu->rb[0], true);
    970		if (!a5xx_idle(gpu, gpu->rb[0]))
    971			return -EINVAL;
    972	}
    973
    974	/*
    975	 * If the chip that we are using does support loading one, then
    976	 * try to load a zap shader into the secure world. If successful
    977	 * we can use the CP to switch out of secure mode. If not then we
    978	 * have no resource but to try to switch ourselves out manually. If we
    979	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
    980	 * be blocked and a permissions violation will soon follow.
    981	 */
    982	ret = a5xx_zap_shader_init(gpu);
    983	if (!ret) {
    984		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
    985		OUT_RING(gpu->rb[0], 0x00000000);
    986
    987		a5xx_flush(gpu, gpu->rb[0], true);
    988		if (!a5xx_idle(gpu, gpu->rb[0]))
    989			return -EINVAL;
    990	} else if (ret == -ENODEV) {
    991		/*
    992		 * This device does not use zap shader (but print a warning
    993		 * just in case someone got their dt wrong.. hopefully they
    994		 * have a debug UART to realize the error of their ways...
    995		 * if you mess this up you are about to crash horribly)
    996		 */
    997		dev_warn_once(gpu->dev->dev,
    998			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
    999		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
   1000	} else {
   1001		return ret;
   1002	}
   1003
   1004	/* Last step - yield the ringbuffer */
   1005	a5xx_preempt_start(gpu);
   1006
   1007	return 0;
   1008}
   1009
   1010static void a5xx_recover(struct msm_gpu *gpu)
   1011{
   1012	int i;
   1013
   1014	adreno_dump_info(gpu);
   1015
   1016	for (i = 0; i < 8; i++) {
   1017		printk("CP_SCRATCH_REG%d: %u\n", i,
   1018			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
   1019	}
   1020
   1021	if (hang_debug)
   1022		a5xx_dump(gpu);
   1023
   1024	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
   1025	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
   1026	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
   1027	adreno_recover(gpu);
   1028}
   1029
   1030static void a5xx_destroy(struct msm_gpu *gpu)
   1031{
   1032	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1033	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
   1034
   1035	DBG("%s", gpu->name);
   1036
   1037	a5xx_preempt_fini(gpu);
   1038
   1039	if (a5xx_gpu->pm4_bo) {
   1040		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
   1041		drm_gem_object_put(a5xx_gpu->pm4_bo);
   1042	}
   1043
   1044	if (a5xx_gpu->pfp_bo) {
   1045		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
   1046		drm_gem_object_put(a5xx_gpu->pfp_bo);
   1047	}
   1048
   1049	if (a5xx_gpu->gpmu_bo) {
   1050		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
   1051		drm_gem_object_put(a5xx_gpu->gpmu_bo);
   1052	}
   1053
   1054	if (a5xx_gpu->shadow_bo) {
   1055		msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
   1056		drm_gem_object_put(a5xx_gpu->shadow_bo);
   1057	}
   1058
   1059	adreno_gpu_cleanup(adreno_gpu);
   1060	kfree(a5xx_gpu);
   1061}
   1062
   1063static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
   1064{
   1065	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
   1066		return false;
   1067
   1068	/*
   1069	 * Nearly every abnormality ends up pausing the GPU and triggering a
   1070	 * fault so we can safely just watch for this one interrupt to fire
   1071	 */
   1072	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
   1073		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
   1074}
   1075
   1076bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
   1077{
   1078	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1079	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
   1080
   1081	if (ring != a5xx_gpu->cur_ring) {
   1082		WARN(1, "Tried to idle a non-current ringbuffer\n");
   1083		return false;
   1084	}
   1085
   1086	/* wait for CP to drain ringbuffer: */
   1087	if (!adreno_idle(gpu, ring))
   1088		return false;
   1089
   1090	if (spin_until(_a5xx_check_idle(gpu))) {
   1091		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
   1092			gpu->name, __builtin_return_address(0),
   1093			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
   1094			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
   1095			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
   1096			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
   1097		return false;
   1098	}
   1099
   1100	return true;
   1101}
   1102
   1103static int a5xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
   1104{
   1105	struct msm_gpu *gpu = arg;
   1106	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
   1107			iova, flags,
   1108			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
   1109			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
   1110			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
   1111			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
   1112
   1113	return 0;
   1114}
   1115
   1116static void a5xx_cp_err_irq(struct msm_gpu *gpu)
   1117{
   1118	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
   1119
   1120	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
   1121		u32 val;
   1122
   1123		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
   1124
   1125		/*
   1126		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
   1127		 * read it twice
   1128		 */
   1129
   1130		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
   1131		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
   1132
   1133		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
   1134			val);
   1135	}
   1136
   1137	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
   1138		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
   1139			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
   1140
   1141	if (status & A5XX_CP_INT_CP_DMA_ERROR)
   1142		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
   1143
   1144	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
   1145		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
   1146
   1147		dev_err_ratelimited(gpu->dev->dev,
   1148			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
   1149			val & (1 << 24) ? "WRITE" : "READ",
   1150			(val & 0xFFFFF) >> 2, val);
   1151	}
   1152
   1153	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
   1154		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
   1155		const char *access[16] = { "reserved", "reserved",
   1156			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
   1157			"", "", "me read", "me write", "", "", "crashdump read",
   1158			"crashdump write" };
   1159
   1160		dev_err_ratelimited(gpu->dev->dev,
   1161			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
   1162			status & 0xFFFFF, access[(status >> 24) & 0xF],
   1163			(status & (1 << 31)), status);
   1164	}
   1165}
   1166
   1167static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
   1168{
   1169	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
   1170		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
   1171
   1172		dev_err_ratelimited(gpu->dev->dev,
   1173			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
   1174			val & (1 << 28) ? "WRITE" : "READ",
   1175			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
   1176			(val >> 24) & 0xF);
   1177
   1178		/* Clear the error */
   1179		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
   1180
   1181		/* Clear the interrupt */
   1182		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
   1183			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
   1184	}
   1185
   1186	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
   1187		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
   1188
   1189	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
   1190		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
   1191			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
   1192
   1193	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
   1194		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
   1195			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
   1196
   1197	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
   1198		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
   1199			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
   1200
   1201	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
   1202		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
   1203
   1204	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
   1205		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
   1206}
   1207
   1208static void a5xx_uche_err_irq(struct msm_gpu *gpu)
   1209{
   1210	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
   1211
   1212	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
   1213
   1214	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
   1215		addr);
   1216}
   1217
   1218static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
   1219{
   1220	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
   1221}
   1222
   1223static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
   1224{
   1225	struct drm_device *dev = gpu->dev;
   1226	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
   1227
   1228	/*
   1229	 * If stalled on SMMU fault, we could trip the GPU's hang detection,
   1230	 * but the fault handler will trigger the devcore dump, and we want
   1231	 * to otherwise resume normally rather than killing the submit, so
   1232	 * just bail.
   1233	 */
   1234	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24))
   1235		return;
   1236
   1237	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
   1238		ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
   1239		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
   1240		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
   1241		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
   1242		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
   1243		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
   1244		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
   1245		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
   1246
   1247	/* Turn off the hangcheck timer to keep it from bothering us */
   1248	del_timer(&gpu->hangcheck_timer);
   1249
   1250	kthread_queue_work(gpu->worker, &gpu->recover_work);
   1251}
   1252
   1253#define RBBM_ERROR_MASK \
   1254	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
   1255	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
   1256	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
   1257	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
   1258	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
   1259	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
   1260
   1261static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
   1262{
   1263	struct msm_drm_private *priv = gpu->dev->dev_private;
   1264	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
   1265
   1266	/*
   1267	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
   1268	 * before the source is cleared the interrupt will storm.
   1269	 */
   1270	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
   1271		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
   1272
   1273	if (priv->disable_err_irq) {
   1274		status &= A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS |
   1275			  A5XX_RBBM_INT_0_MASK_CP_SW;
   1276	}
   1277
   1278	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
   1279	if (status & RBBM_ERROR_MASK)
   1280		a5xx_rbbm_err_irq(gpu, status);
   1281
   1282	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
   1283		a5xx_cp_err_irq(gpu);
   1284
   1285	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
   1286		a5xx_fault_detect_irq(gpu);
   1287
   1288	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
   1289		a5xx_uche_err_irq(gpu);
   1290
   1291	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
   1292		a5xx_gpmu_err_irq(gpu);
   1293
   1294	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
   1295		a5xx_preempt_trigger(gpu);
   1296		msm_gpu_retire(gpu);
   1297	}
   1298
   1299	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
   1300		a5xx_preempt_irq(gpu);
   1301
   1302	return IRQ_HANDLED;
   1303}
   1304
   1305static const u32 a5xx_registers[] = {
   1306	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
   1307	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
   1308	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
   1309	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
   1310	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
   1311	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
   1312	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
   1313	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
   1314	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
   1315	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
   1316	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
   1317	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
   1318	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
   1319	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
   1320	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
   1321	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
   1322	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
   1323	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
   1324	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
   1325	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
   1326	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
   1327	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
   1328	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
   1329	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
   1330	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
   1331	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
   1332	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
   1333	0xAC60, 0xAC60, ~0,
   1334};
   1335
   1336static void a5xx_dump(struct msm_gpu *gpu)
   1337{
   1338	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
   1339		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
   1340	adreno_dump(gpu);
   1341}
   1342
   1343static int a5xx_pm_resume(struct msm_gpu *gpu)
   1344{
   1345	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1346	int ret;
   1347
   1348	/* Turn on the core power */
   1349	ret = msm_gpu_pm_resume(gpu);
   1350	if (ret)
   1351		return ret;
   1352
   1353	/* Adreno 506, 508, 509, 510, 512 needs manual RBBM sus/res control */
   1354	if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))) {
   1355		/* Halt the sp_input_clk at HM level */
   1356		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
   1357		a5xx_set_hwcg(gpu, true);
   1358		/* Turn on sp_input_clk at HM level */
   1359		gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
   1360		return 0;
   1361	}
   1362
   1363	/* Turn the RBCCU domain first to limit the chances of voltage droop */
   1364	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
   1365
   1366	/* Wait 3 usecs before polling */
   1367	udelay(3);
   1368
   1369	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
   1370		(1 << 20), (1 << 20));
   1371	if (ret) {
   1372		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
   1373			gpu->name,
   1374			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
   1375		return ret;
   1376	}
   1377
   1378	/* Turn on the SP domain */
   1379	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
   1380	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
   1381		(1 << 20), (1 << 20));
   1382	if (ret)
   1383		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
   1384			gpu->name);
   1385
   1386	return ret;
   1387}
   1388
   1389static int a5xx_pm_suspend(struct msm_gpu *gpu)
   1390{
   1391	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1392	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
   1393	u32 mask = 0xf;
   1394	int i, ret;
   1395
   1396	/* A506, A508, A510 have 3 XIN ports in VBIF */
   1397	if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) ||
   1398	    adreno_is_a510(adreno_gpu))
   1399		mask = 0x7;
   1400
   1401	/* Clear the VBIF pipe before shutting down */
   1402	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
   1403	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
   1404				mask) == mask);
   1405
   1406	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
   1407
   1408	/*
   1409	 * Reset the VBIF before power collapse to avoid issue with FIFO
   1410	 * entries on Adreno A510 and A530 (the others will tend to lock up)
   1411	 */
   1412	if (adreno_is_a510(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
   1413		gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
   1414		gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
   1415	}
   1416
   1417	ret = msm_gpu_pm_suspend(gpu);
   1418	if (ret)
   1419		return ret;
   1420
   1421	if (a5xx_gpu->has_whereami)
   1422		for (i = 0; i < gpu->nr_rings; i++)
   1423			a5xx_gpu->shadow[i] = 0;
   1424
   1425	return 0;
   1426}
   1427
   1428static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
   1429{
   1430	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
   1431		REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
   1432
   1433	return 0;
   1434}
   1435
   1436struct a5xx_crashdumper {
   1437	void *ptr;
   1438	struct drm_gem_object *bo;
   1439	u64 iova;
   1440};
   1441
   1442struct a5xx_gpu_state {
   1443	struct msm_gpu_state base;
   1444	u32 *hlsqregs;
   1445};
   1446
   1447static int a5xx_crashdumper_init(struct msm_gpu *gpu,
   1448		struct a5xx_crashdumper *dumper)
   1449{
   1450	dumper->ptr = msm_gem_kernel_new(gpu->dev,
   1451		SZ_1M, MSM_BO_WC, gpu->aspace,
   1452		&dumper->bo, &dumper->iova);
   1453
   1454	if (!IS_ERR(dumper->ptr))
   1455		msm_gem_object_set_name(dumper->bo, "crashdump");
   1456
   1457	return PTR_ERR_OR_ZERO(dumper->ptr);
   1458}
   1459
   1460static int a5xx_crashdumper_run(struct msm_gpu *gpu,
   1461		struct a5xx_crashdumper *dumper)
   1462{
   1463	u32 val;
   1464
   1465	if (IS_ERR_OR_NULL(dumper->ptr))
   1466		return -EINVAL;
   1467
   1468	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
   1469		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
   1470
   1471	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
   1472
   1473	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
   1474		val & 0x04, 100, 10000);
   1475}
   1476
   1477/*
   1478 * These are a list of the registers that need to be read through the HLSQ
   1479 * aperture through the crashdumper.  These are not nominally accessible from
   1480 * the CPU on a secure platform.
   1481 */
   1482static const struct {
   1483	u32 type;
   1484	u32 regoffset;
   1485	u32 count;
   1486} a5xx_hlsq_aperture_regs[] = {
   1487	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
   1488	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
   1489	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
   1490	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
   1491	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
   1492	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
   1493	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
   1494	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
   1495	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
   1496	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
   1497	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
   1498	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
   1499	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
   1500	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
   1501	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
   1502};
   1503
   1504static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
   1505		struct a5xx_gpu_state *a5xx_state)
   1506{
   1507	struct a5xx_crashdumper dumper = { 0 };
   1508	u32 offset, count = 0;
   1509	u64 *ptr;
   1510	int i;
   1511
   1512	if (a5xx_crashdumper_init(gpu, &dumper))
   1513		return;
   1514
   1515	/* The script will be written at offset 0 */
   1516	ptr = dumper.ptr;
   1517
   1518	/* Start writing the data at offset 256k */
   1519	offset = dumper.iova + (256 * SZ_1K);
   1520
   1521	/* Count how many additional registers to get from the HLSQ aperture */
   1522	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
   1523		count += a5xx_hlsq_aperture_regs[i].count;
   1524
   1525	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
   1526	if (!a5xx_state->hlsqregs)
   1527		return;
   1528
   1529	/* Build the crashdump script */
   1530	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
   1531		u32 type = a5xx_hlsq_aperture_regs[i].type;
   1532		u32 c = a5xx_hlsq_aperture_regs[i].count;
   1533
   1534		/* Write the register to select the desired bank */
   1535		*ptr++ = ((u64) type << 8);
   1536		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
   1537			(1 << 21) | 1;
   1538
   1539		*ptr++ = offset;
   1540		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
   1541			| c;
   1542
   1543		offset += c * sizeof(u32);
   1544	}
   1545
   1546	/* Write two zeros to close off the script */
   1547	*ptr++ = 0;
   1548	*ptr++ = 0;
   1549
   1550	if (a5xx_crashdumper_run(gpu, &dumper)) {
   1551		kfree(a5xx_state->hlsqregs);
   1552		msm_gem_kernel_put(dumper.bo, gpu->aspace);
   1553		return;
   1554	}
   1555
   1556	/* Copy the data from the crashdumper to the state */
   1557	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
   1558		count * sizeof(u32));
   1559
   1560	msm_gem_kernel_put(dumper.bo, gpu->aspace);
   1561}
   1562
   1563static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
   1564{
   1565	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
   1566			GFP_KERNEL);
   1567	bool stalled = !!(gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24));
   1568
   1569	if (!a5xx_state)
   1570		return ERR_PTR(-ENOMEM);
   1571
   1572	/* Temporarily disable hardware clock gating before reading the hw */
   1573	a5xx_set_hwcg(gpu, false);
   1574
   1575	/* First get the generic state from the adreno core */
   1576	adreno_gpu_state_get(gpu, &(a5xx_state->base));
   1577
   1578	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
   1579
   1580	/*
   1581	 * Get the HLSQ regs with the help of the crashdumper, but only if
   1582	 * we are not stalled in an iommu fault (in which case the crashdumper
   1583	 * would not have access to memory)
   1584	 */
   1585	if (!stalled)
   1586		a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
   1587
   1588	a5xx_set_hwcg(gpu, true);
   1589
   1590	return &a5xx_state->base;
   1591}
   1592
   1593static void a5xx_gpu_state_destroy(struct kref *kref)
   1594{
   1595	struct msm_gpu_state *state = container_of(kref,
   1596		struct msm_gpu_state, ref);
   1597	struct a5xx_gpu_state *a5xx_state = container_of(state,
   1598		struct a5xx_gpu_state, base);
   1599
   1600	kfree(a5xx_state->hlsqregs);
   1601
   1602	adreno_gpu_state_destroy(state);
   1603	kfree(a5xx_state);
   1604}
   1605
   1606static int a5xx_gpu_state_put(struct msm_gpu_state *state)
   1607{
   1608	if (IS_ERR_OR_NULL(state))
   1609		return 1;
   1610
   1611	return kref_put(&state->ref, a5xx_gpu_state_destroy);
   1612}
   1613
   1614
   1615#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
   1616static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
   1617		      struct drm_printer *p)
   1618{
   1619	int i, j;
   1620	u32 pos = 0;
   1621	struct a5xx_gpu_state *a5xx_state = container_of(state,
   1622		struct a5xx_gpu_state, base);
   1623
   1624	if (IS_ERR_OR_NULL(state))
   1625		return;
   1626
   1627	adreno_show(gpu, state, p);
   1628
   1629	/* Dump the additional a5xx HLSQ registers */
   1630	if (!a5xx_state->hlsqregs)
   1631		return;
   1632
   1633	drm_printf(p, "registers-hlsq:\n");
   1634
   1635	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
   1636		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
   1637		u32 c = a5xx_hlsq_aperture_regs[i].count;
   1638
   1639		for (j = 0; j < c; j++, pos++, o++) {
   1640			/*
   1641			 * To keep the crashdump simple we pull the entire range
   1642			 * for each register type but not all of the registers
   1643			 * in the range are valid. Fortunately invalid registers
   1644			 * stick out like a sore thumb with a value of
   1645			 * 0xdeadbeef
   1646			 */
   1647			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
   1648				continue;
   1649
   1650			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
   1651				o << 2, a5xx_state->hlsqregs[pos]);
   1652		}
   1653	}
   1654}
   1655#endif
   1656
   1657static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
   1658{
   1659	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1660	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
   1661
   1662	return a5xx_gpu->cur_ring;
   1663}
   1664
   1665static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
   1666{
   1667	u64 busy_cycles;
   1668
   1669	/* Only read the gpu busy if the hardware is already active */
   1670	if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0) {
   1671		*out_sample_rate = 1;
   1672		return 0;
   1673	}
   1674
   1675	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
   1676			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
   1677	*out_sample_rate = clk_get_rate(gpu->core_clk);
   1678
   1679	pm_runtime_put(&gpu->pdev->dev);
   1680
   1681	return busy_cycles;
   1682}
   1683
   1684static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
   1685{
   1686	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1687	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
   1688
   1689	if (a5xx_gpu->has_whereami)
   1690		return a5xx_gpu->shadow[ring->id];
   1691
   1692	return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
   1693}
   1694
   1695static const struct adreno_gpu_funcs funcs = {
   1696	.base = {
   1697		.get_param = adreno_get_param,
   1698		.set_param = adreno_set_param,
   1699		.hw_init = a5xx_hw_init,
   1700		.pm_suspend = a5xx_pm_suspend,
   1701		.pm_resume = a5xx_pm_resume,
   1702		.recover = a5xx_recover,
   1703		.submit = a5xx_submit,
   1704		.active_ring = a5xx_active_ring,
   1705		.irq = a5xx_irq,
   1706		.destroy = a5xx_destroy,
   1707#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
   1708		.show = a5xx_show,
   1709#endif
   1710#if defined(CONFIG_DEBUG_FS)
   1711		.debugfs_init = a5xx_debugfs_init,
   1712#endif
   1713		.gpu_busy = a5xx_gpu_busy,
   1714		.gpu_state_get = a5xx_gpu_state_get,
   1715		.gpu_state_put = a5xx_gpu_state_put,
   1716		.create_address_space = adreno_iommu_create_address_space,
   1717		.get_rptr = a5xx_get_rptr,
   1718	},
   1719	.get_timestamp = a5xx_get_timestamp,
   1720};
   1721
   1722static void check_speed_bin(struct device *dev)
   1723{
   1724	struct nvmem_cell *cell;
   1725	u32 val;
   1726
   1727	/*
   1728	 * If the OPP table specifies a opp-supported-hw property then we have
   1729	 * to set something with dev_pm_opp_set_supported_hw() or the table
   1730	 * doesn't get populated so pick an arbitrary value that should
   1731	 * ensure the default frequencies are selected but not conflict with any
   1732	 * actual bins
   1733	 */
   1734	val = 0x80;
   1735
   1736	cell = nvmem_cell_get(dev, "speed_bin");
   1737
   1738	if (!IS_ERR(cell)) {
   1739		void *buf = nvmem_cell_read(cell, NULL);
   1740
   1741		if (!IS_ERR(buf)) {
   1742			u8 bin = *((u8 *) buf);
   1743
   1744			val = (1 << bin);
   1745			kfree(buf);
   1746		}
   1747
   1748		nvmem_cell_put(cell);
   1749	}
   1750
   1751	devm_pm_opp_set_supported_hw(dev, &val, 1);
   1752}
   1753
   1754struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
   1755{
   1756	struct msm_drm_private *priv = dev->dev_private;
   1757	struct platform_device *pdev = priv->gpu_pdev;
   1758	struct a5xx_gpu *a5xx_gpu = NULL;
   1759	struct adreno_gpu *adreno_gpu;
   1760	struct msm_gpu *gpu;
   1761	int ret;
   1762
   1763	if (!pdev) {
   1764		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
   1765		return ERR_PTR(-ENXIO);
   1766	}
   1767
   1768	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
   1769	if (!a5xx_gpu)
   1770		return ERR_PTR(-ENOMEM);
   1771
   1772	adreno_gpu = &a5xx_gpu->base;
   1773	gpu = &adreno_gpu->base;
   1774
   1775	adreno_gpu->registers = a5xx_registers;
   1776
   1777	a5xx_gpu->lm_leakage = 0x4E001A;
   1778
   1779	check_speed_bin(&pdev->dev);
   1780
   1781	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
   1782	if (ret) {
   1783		a5xx_destroy(&(a5xx_gpu->base.base));
   1784		return ERR_PTR(ret);
   1785	}
   1786
   1787	if (gpu->aspace)
   1788		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
   1789
   1790	/* Set up the preemption specific bits and pieces for each ringbuffer */
   1791	a5xx_preempt_init(gpu);
   1792
   1793	return gpu;
   1794}