cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

a6xx_gpu.c (59820B)


      1// SPDX-License-Identifier: GPL-2.0
      2/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
      3
      4
      5#include "msm_gem.h"
      6#include "msm_mmu.h"
      7#include "msm_gpu_trace.h"
      8#include "a6xx_gpu.h"
      9#include "a6xx_gmu.xml.h"
     10
     11#include <linux/bitfield.h>
     12#include <linux/devfreq.h>
     13#include <linux/soc/qcom/llcc-qcom.h>
     14
     15#define GPU_PAS_ID 13
     16
     17static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
     18{
     19	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
     20	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
     21
     22	/* Check that the GMU is idle */
     23	if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
     24		return false;
     25
     26	/* Check tha the CX master is idle */
     27	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
     28			~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
     29		return false;
     30
     31	return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
     32		A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
     33}
     34
     35static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
     36{
     37	/* wait for CP to drain ringbuffer: */
     38	if (!adreno_idle(gpu, ring))
     39		return false;
     40
     41	if (spin_until(_a6xx_check_idle(gpu))) {
     42		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
     43			gpu->name, __builtin_return_address(0),
     44			gpu_read(gpu, REG_A6XX_RBBM_STATUS),
     45			gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
     46			gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
     47			gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
     48		return false;
     49	}
     50
     51	return true;
     52}
     53
     54static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
     55{
     56	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
     57	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
     58
     59	/* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
     60	if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
     61		OUT_PKT7(ring, CP_WHERE_AM_I, 2);
     62		OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
     63		OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
     64	}
     65}
     66
     67static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
     68{
     69	uint32_t wptr;
     70	unsigned long flags;
     71
     72	update_shadow_rptr(gpu, ring);
     73
     74	spin_lock_irqsave(&ring->preempt_lock, flags);
     75
     76	/* Copy the shadow to the actual register */
     77	ring->cur = ring->next;
     78
     79	/* Make sure to wrap wptr if we need to */
     80	wptr = get_wptr(ring);
     81
     82	spin_unlock_irqrestore(&ring->preempt_lock, flags);
     83
     84	/* Make sure everything is posted before making a decision */
     85	mb();
     86
     87	gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
     88}
     89
     90static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
     91		u64 iova)
     92{
     93	OUT_PKT7(ring, CP_REG_TO_MEM, 3);
     94	OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
     95		CP_REG_TO_MEM_0_CNT(2) |
     96		CP_REG_TO_MEM_0_64B);
     97	OUT_RING(ring, lower_32_bits(iova));
     98	OUT_RING(ring, upper_32_bits(iova));
     99}
    100
    101static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
    102		struct msm_ringbuffer *ring, struct msm_file_private *ctx)
    103{
    104	bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
    105	phys_addr_t ttbr;
    106	u32 asid;
    107	u64 memptr = rbmemptr(ring, ttbr0);
    108
    109	if (ctx->seqno == a6xx_gpu->base.base.cur_ctx_seqno)
    110		return;
    111
    112	if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
    113		return;
    114
    115	if (!sysprof) {
    116		/* Turn off protected mode to write to special registers */
    117		OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
    118		OUT_RING(ring, 0);
    119
    120		OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
    121		OUT_RING(ring, 1);
    122	}
    123
    124	/* Execute the table update */
    125	OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
    126	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
    127
    128	OUT_RING(ring,
    129		CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
    130		CP_SMMU_TABLE_UPDATE_1_ASID(asid));
    131	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
    132	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
    133
    134	/*
    135	 * Write the new TTBR0 to the memstore. This is good for debugging.
    136	 */
    137	OUT_PKT7(ring, CP_MEM_WRITE, 4);
    138	OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
    139	OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
    140	OUT_RING(ring, lower_32_bits(ttbr));
    141	OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr));
    142
    143	/*
    144	 * And finally, trigger a uche flush to be sure there isn't anything
    145	 * lingering in that part of the GPU
    146	 */
    147
    148	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
    149	OUT_RING(ring, 0x31);
    150
    151	if (!sysprof) {
    152		/*
    153		 * Wait for SRAM clear after the pgtable update, so the
    154		 * two can happen in parallel:
    155		 */
    156		OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
    157		OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
    158		OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
    159				REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
    160		OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
    161		OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
    162		OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
    163		OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
    164
    165		/* Re-enable protected mode: */
    166		OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
    167		OUT_RING(ring, 1);
    168	}
    169}
    170
    171static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
    172{
    173	unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
    174	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    175	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
    176	struct msm_ringbuffer *ring = submit->ring;
    177	unsigned int i, ibs = 0;
    178
    179	a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
    180
    181	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
    182		rbmemptr_stats(ring, index, cpcycles_start));
    183
    184	/*
    185	 * For PM4 the GMU register offsets are calculated from the base of the
    186	 * GPU registers so we need to add 0x1a800 to the register value on A630
    187	 * to get the right value from PM4.
    188	 */
    189	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
    190		rbmemptr_stats(ring, index, alwayson_start));
    191
    192	/* Invalidate CCU depth and color */
    193	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
    194	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
    195
    196	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
    197	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
    198
    199	/* Submit the commands */
    200	for (i = 0; i < submit->nr_cmds; i++) {
    201		switch (submit->cmd[i].type) {
    202		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
    203			break;
    204		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
    205			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
    206				break;
    207			fallthrough;
    208		case MSM_SUBMIT_CMD_BUF:
    209			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
    210			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
    211			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
    212			OUT_RING(ring, submit->cmd[i].size);
    213			ibs++;
    214			break;
    215		}
    216
    217		/*
    218		 * Periodically update shadow-wptr if needed, so that we
    219		 * can see partial progress of submits with large # of
    220		 * cmds.. otherwise we could needlessly stall waiting for
    221		 * ringbuffer state, simply due to looking at a shadow
    222		 * rptr value that has not been updated
    223		 */
    224		if ((ibs % 32) == 0)
    225			update_shadow_rptr(gpu, ring);
    226	}
    227
    228	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
    229		rbmemptr_stats(ring, index, cpcycles_end));
    230	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
    231		rbmemptr_stats(ring, index, alwayson_end));
    232
    233	/* Write the fence to the scratch register */
    234	OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
    235	OUT_RING(ring, submit->seqno);
    236
    237	/*
    238	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
    239	 * timestamp is written to the memory and then triggers the interrupt
    240	 */
    241	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
    242	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
    243		CP_EVENT_WRITE_0_IRQ);
    244	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
    245	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
    246	OUT_RING(ring, submit->seqno);
    247
    248	trace_msm_gpu_submit_flush(submit,
    249		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
    250			REG_A6XX_CP_ALWAYS_ON_COUNTER_HI));
    251
    252	a6xx_flush(gpu, ring);
    253}
    254
    255const struct adreno_reglist a630_hwcg[] = {
    256	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222},
    257	{REG_A6XX_RBBM_CLOCK_CNTL_SP1, 0x22222222},
    258	{REG_A6XX_RBBM_CLOCK_CNTL_SP2, 0x22222222},
    259	{REG_A6XX_RBBM_CLOCK_CNTL_SP3, 0x22222222},
    260	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220},
    261	{REG_A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220},
    262	{REG_A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220},
    263	{REG_A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220},
    264	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
    265	{REG_A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
    266	{REG_A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
    267	{REG_A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
    268	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf},
    269	{REG_A6XX_RBBM_CLOCK_HYST_SP1, 0x0000f3cf},
    270	{REG_A6XX_RBBM_CLOCK_HYST_SP2, 0x0000f3cf},
    271	{REG_A6XX_RBBM_CLOCK_HYST_SP3, 0x0000f3cf},
    272	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
    273	{REG_A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222},
    274	{REG_A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222},
    275	{REG_A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222},
    276	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
    277	{REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
    278	{REG_A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
    279	{REG_A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
    280	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
    281	{REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
    282	{REG_A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222},
    283	{REG_A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222},
    284	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
    285	{REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
    286	{REG_A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222},
    287	{REG_A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222},
    288	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
    289	{REG_A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
    290	{REG_A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
    291	{REG_A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
    292	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
    293	{REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
    294	{REG_A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
    295	{REG_A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
    296	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
    297	{REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
    298	{REG_A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777},
    299	{REG_A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777},
    300	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
    301	{REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
    302	{REG_A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777},
    303	{REG_A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777},
    304	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
    305	{REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
    306	{REG_A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
    307	{REG_A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
    308	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
    309	{REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
    310	{REG_A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
    311	{REG_A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
    312	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
    313	{REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
    314	{REG_A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111},
    315	{REG_A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111},
    316	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
    317	{REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
    318	{REG_A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111},
    319	{REG_A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111},
    320	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
    321	{REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
    322	{REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
    323	{REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
    324	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
    325	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
    326	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
    327	{REG_A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
    328	{REG_A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
    329	{REG_A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
    330	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
    331	{REG_A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222},
    332	{REG_A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222},
    333	{REG_A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222},
    334	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
    335	{REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
    336	{REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
    337	{REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
    338	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00},
    339	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040f00},
    340	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040f00},
    341	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040f00},
    342	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
    343	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
    344	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
    345	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
    346	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
    347	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
    348	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
    349	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
    350	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
    351	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
    352	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
    353	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
    354	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
    355	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
    356	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
    357	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
    358	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
    359	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
    360	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
    361	{},
    362};
    363
    364const struct adreno_reglist a640_hwcg[] = {
    365	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
    366	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
    367	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
    368	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
    369	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
    370	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
    371	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
    372	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
    373	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
    374	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
    375	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
    376	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
    377	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
    378	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
    379	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
    380	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
    381	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
    382	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
    383	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
    384	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
    385	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05222022},
    386	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
    387	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
    388	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
    389	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
    390	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
    391	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
    392	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
    393	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
    394	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
    395	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
    396	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
    397	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
    398	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
    399	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
    400	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
    401	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
    402	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
    403	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
    404	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
    405	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
    406	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
    407	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
    408	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
    409	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
    410	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
    411	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
    412	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
    413	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
    414	{},
    415};
    416
    417const struct adreno_reglist a650_hwcg[] = {
    418	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
    419	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
    420	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
    421	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
    422	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
    423	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
    424	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
    425	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
    426	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
    427	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
    428	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
    429	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
    430	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
    431	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
    432	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
    433	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
    434	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
    435	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
    436	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
    437	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
    438	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
    439	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
    440	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
    441	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
    442	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
    443	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
    444	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
    445	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
    446	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
    447	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
    448	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
    449	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
    450	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
    451	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
    452	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
    453	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
    454	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
    455	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
    456	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
    457	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777},
    458	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
    459	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
    460	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
    461	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
    462	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
    463	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
    464	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
    465	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
    466	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
    467	{},
    468};
    469
    470const struct adreno_reglist a660_hwcg[] = {
    471	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
    472	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
    473	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
    474	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
    475	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
    476	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
    477	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
    478	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
    479	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
    480	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
    481	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
    482	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
    483	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
    484	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
    485	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
    486	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
    487	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
    488	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
    489	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
    490	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
    491	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
    492	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
    493	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
    494	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
    495	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
    496	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
    497	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
    498	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
    499	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
    500	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
    501	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
    502	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
    503	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
    504	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
    505	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
    506	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
    507	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
    508	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
    509	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
    510	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
    511	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
    512	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
    513	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
    514	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
    515	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
    516	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
    517	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
    518	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
    519	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
    520	{},
    521};
    522
    523static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
    524{
    525	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    526	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
    527	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
    528	const struct adreno_reglist *reg;
    529	unsigned int i;
    530	u32 val, clock_cntl_on;
    531
    532	if (!adreno_gpu->info->hwcg)
    533		return;
    534
    535	if (adreno_is_a630(adreno_gpu))
    536		clock_cntl_on = 0x8aa8aa02;
    537	else
    538		clock_cntl_on = 0x8aa8aa82;
    539
    540	val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
    541
    542	/* Don't re-program the registers if they are already correct */
    543	if ((!state && !val) || (state && (val == clock_cntl_on)))
    544		return;
    545
    546	/* Disable SP clock before programming HWCG registers */
    547	gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
    548
    549	for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++)
    550		gpu_write(gpu, reg->offset, state ? reg->value : 0);
    551
    552	/* Enable SP clock */
    553	gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
    554
    555	gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
    556}
    557
    558/* For a615, a616, a618, A619, a630, a640 and a680 */
    559static const u32 a6xx_protect[] = {
    560	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
    561	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
    562	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
    563	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
    564	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
    565	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
    566	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
    567	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
    568	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
    569	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
    570	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
    571	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
    572	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
    573	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
    574	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
    575	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
    576	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
    577	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
    578	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
    579	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
    580	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
    581	A6XX_PROTECT_NORDWR(0x09e70, 0x0001),
    582	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
    583	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
    584	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
    585	A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
    586	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
    587	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
    588	A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
    589	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
    590	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
    591	A6XX_PROTECT_NORDWR(0x11c00, 0x0000), /* note: infinite range */
    592};
    593
    594/* These are for a620 and a650 */
    595static const u32 a650_protect[] = {
    596	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
    597	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
    598	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
    599	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
    600	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
    601	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
    602	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
    603	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
    604	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
    605	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
    606	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
    607	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
    608	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
    609	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
    610	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
    611	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
    612	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
    613	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
    614	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
    615	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
    616	A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
    617	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
    618	A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
    619	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
    620	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
    621	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
    622	A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
    623	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
    624	A6XX_PROTECT_NORDWR(0x0b608, 0x0007),
    625	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
    626	A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
    627	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
    628	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
    629	A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
    630	A6XX_PROTECT_NORDWR(0x1a800, 0x1fff),
    631	A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
    632	A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
    633	A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
    634	A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
    635};
    636
    637/* These are for a635 and a660 */
    638static const u32 a660_protect[] = {
    639	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
    640	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
    641	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
    642	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
    643	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
    644	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
    645	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
    646	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
    647	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
    648	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
    649	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
    650	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
    651	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
    652	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
    653	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
    654	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
    655	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
    656	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
    657	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
    658	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
    659	A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
    660	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
    661	A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
    662	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
    663	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
    664	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
    665	A6XX_PROTECT_NORDWR(0x0ae50, 0x012f),
    666	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
    667	A6XX_PROTECT_NORDWR(0x0b608, 0x0006),
    668	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
    669	A6XX_PROTECT_NORDWR(0x0be20, 0x015f),
    670	A6XX_PROTECT_NORDWR(0x0d000, 0x05ff),
    671	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
    672	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
    673	A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
    674	A6XX_PROTECT_NORDWR(0x1a400, 0x1fff),
    675	A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
    676	A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
    677	A6XX_PROTECT_NORDWR(0x1f860, 0x0000),
    678	A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
    679	A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
    680};
    681
    682static void a6xx_set_cp_protect(struct msm_gpu *gpu)
    683{
    684	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    685	const u32 *regs = a6xx_protect;
    686	unsigned i, count, count_max;
    687
    688	if (adreno_is_a650(adreno_gpu)) {
    689		regs = a650_protect;
    690		count = ARRAY_SIZE(a650_protect);
    691		count_max = 48;
    692		BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
    693	} else if (adreno_is_a660_family(adreno_gpu)) {
    694		regs = a660_protect;
    695		count = ARRAY_SIZE(a660_protect);
    696		count_max = 48;
    697		BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48);
    698	} else {
    699		regs = a6xx_protect;
    700		count = ARRAY_SIZE(a6xx_protect);
    701		count_max = 32;
    702		BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
    703	}
    704
    705	/*
    706	 * Enable access protection to privileged registers, fault on an access
    707	 * protect violation and select the last span to protect from the start
    708	 * address all the way to the end of the register address space
    709	 */
    710	gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, BIT(0) | BIT(1) | BIT(3));
    711
    712	for (i = 0; i < count - 1; i++)
    713		gpu_write(gpu, REG_A6XX_CP_PROTECT(i), regs[i]);
    714	/* last CP_PROTECT to have "infinite" length on the last entry */
    715	gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]);
    716}
    717
    718static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
    719{
    720	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    721	u32 lower_bit = 2;
    722	u32 amsbc = 0;
    723	u32 rgb565_predicator = 0;
    724	u32 uavflagprd_inv = 0;
    725
    726	/* a618 is using the hw default values */
    727	if (adreno_is_a618(adreno_gpu))
    728		return;
    729
    730	if (adreno_is_a640_family(adreno_gpu))
    731		amsbc = 1;
    732
    733	if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) {
    734		/* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
    735		lower_bit = 3;
    736		amsbc = 1;
    737		rgb565_predicator = 1;
    738		uavflagprd_inv = 2;
    739	}
    740
    741	if (adreno_is_7c3(adreno_gpu)) {
    742		lower_bit = 1;
    743		amsbc = 1;
    744		rgb565_predicator = 1;
    745		uavflagprd_inv = 2;
    746	}
    747
    748	gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
    749		rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1);
    750	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1);
    751	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
    752		uavflagprd_inv << 4 | lower_bit << 1);
    753	gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, lower_bit << 21);
    754}
    755
    756static int a6xx_cp_init(struct msm_gpu *gpu)
    757{
    758	struct msm_ringbuffer *ring = gpu->rb[0];
    759
    760	OUT_PKT7(ring, CP_ME_INIT, 8);
    761
    762	OUT_RING(ring, 0x0000002f);
    763
    764	/* Enable multiple hardware contexts */
    765	OUT_RING(ring, 0x00000003);
    766
    767	/* Enable error detection */
    768	OUT_RING(ring, 0x20000000);
    769
    770	/* Don't enable header dump */
    771	OUT_RING(ring, 0x00000000);
    772	OUT_RING(ring, 0x00000000);
    773
    774	/* No workarounds enabled */
    775	OUT_RING(ring, 0x00000000);
    776
    777	/* Pad rest of the cmds with 0's */
    778	OUT_RING(ring, 0x00000000);
    779	OUT_RING(ring, 0x00000000);
    780
    781	a6xx_flush(gpu, ring);
    782	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
    783}
    784
    785/*
    786 * Check that the microcode version is new enough to include several key
    787 * security fixes. Return true if the ucode is safe.
    788 */
    789static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
    790		struct drm_gem_object *obj)
    791{
    792	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
    793	struct msm_gpu *gpu = &adreno_gpu->base;
    794	const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
    795	u32 *buf = msm_gem_get_vaddr(obj);
    796	bool ret = false;
    797
    798	if (IS_ERR(buf))
    799		return false;
    800
    801	/*
    802	 * Targets up to a640 (a618, a630 and a640) need to check for a
    803	 * microcode version that is patched to support the whereami opcode or
    804	 * one that is new enough to include it by default.
    805	 *
    806	 * a650 tier targets don't need whereami but still need to be
    807	 * equal to or newer than 0.95 for other security fixes
    808	 *
    809	 * a660 targets have all the critical security fixes from the start
    810	 */
    811	if (!strcmp(sqe_name, "a630_sqe.fw")) {
    812		/*
    813		 * If the lowest nibble is 0xa that is an indication that this
    814		 * microcode has been patched. The actual version is in dword
    815		 * [3] but we only care about the patchlevel which is the lowest
    816		 * nibble of dword [3]
    817		 *
    818		 * Otherwise check that the firmware is greater than or equal
    819		 * to 1.90 which was the first version that had this fix built
    820		 * in
    821		 */
    822		if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
    823			(buf[0] & 0xfff) >= 0x190) {
    824			a6xx_gpu->has_whereami = true;
    825			ret = true;
    826			goto out;
    827		}
    828
    829		DRM_DEV_ERROR(&gpu->pdev->dev,
    830			"a630 SQE ucode is too old. Have version %x need at least %x\n",
    831			buf[0] & 0xfff, 0x190);
    832	} else if (!strcmp(sqe_name, "a650_sqe.fw")) {
    833		if ((buf[0] & 0xfff) >= 0x095) {
    834			ret = true;
    835			goto out;
    836		}
    837
    838		DRM_DEV_ERROR(&gpu->pdev->dev,
    839			"a650 SQE ucode is too old. Have version %x need at least %x\n",
    840			buf[0] & 0xfff, 0x095);
    841	} else if (!strcmp(sqe_name, "a660_sqe.fw")) {
    842		ret = true;
    843	} else {
    844		DRM_DEV_ERROR(&gpu->pdev->dev,
    845			"unknown GPU, add it to a6xx_ucode_check_version()!!\n");
    846	}
    847out:
    848	msm_gem_put_vaddr(obj);
    849	return ret;
    850}
    851
    852static int a6xx_ucode_init(struct msm_gpu *gpu)
    853{
    854	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    855	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
    856
    857	if (!a6xx_gpu->sqe_bo) {
    858		a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
    859			adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
    860
    861		if (IS_ERR(a6xx_gpu->sqe_bo)) {
    862			int ret = PTR_ERR(a6xx_gpu->sqe_bo);
    863
    864			a6xx_gpu->sqe_bo = NULL;
    865			DRM_DEV_ERROR(&gpu->pdev->dev,
    866				"Could not allocate SQE ucode: %d\n", ret);
    867
    868			return ret;
    869		}
    870
    871		msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
    872		if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
    873			msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
    874			drm_gem_object_put(a6xx_gpu->sqe_bo);
    875
    876			a6xx_gpu->sqe_bo = NULL;
    877			return -EPERM;
    878		}
    879	}
    880
    881	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE,
    882		REG_A6XX_CP_SQE_INSTR_BASE+1, a6xx_gpu->sqe_iova);
    883
    884	return 0;
    885}
    886
    887static int a6xx_zap_shader_init(struct msm_gpu *gpu)
    888{
    889	static bool loaded;
    890	int ret;
    891
    892	if (loaded)
    893		return 0;
    894
    895	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
    896
    897	loaded = !ret;
    898	return ret;
    899}
    900
    901#define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
    902	  A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
    903	  A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
    904	  A6XX_RBBM_INT_0_MASK_CP_IB2 | \
    905	  A6XX_RBBM_INT_0_MASK_CP_IB1 | \
    906	  A6XX_RBBM_INT_0_MASK_CP_RB | \
    907	  A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
    908	  A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
    909	  A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
    910	  A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
    911	  A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
    912
    913static int hw_init(struct msm_gpu *gpu)
    914{
    915	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    916	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
    917	int ret;
    918
    919	/* Make sure the GMU keeps the GPU on while we set it up */
    920	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
    921
    922	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
    923
    924	/*
    925	 * Disable the trusted memory range - we don't actually supported secure
    926	 * memory rendering at this point in time and we don't want to block off
    927	 * part of the virtual memory space.
    928	 */
    929	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
    930		REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
    931	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
    932
    933	/* Turn on 64 bit addressing for all blocks */
    934	gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
    935	gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
    936	gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
    937	gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
    938	gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
    939	gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
    940	gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
    941	gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
    942	gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
    943	gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
    944	gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
    945	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
    946
    947	/* enable hardware clockgating */
    948	a6xx_set_hwcg(gpu, true);
    949
    950	/* VBIF/GBIF start*/
    951	if (adreno_is_a640_family(adreno_gpu) ||
    952	    adreno_is_a650_family(adreno_gpu)) {
    953		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
    954		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
    955		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
    956		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
    957		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
    958		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3);
    959	} else {
    960		gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
    961	}
    962
    963	if (adreno_is_a630(adreno_gpu))
    964		gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
    965
    966	/* Make all blocks contribute to the GPU BUSY perf counter */
    967	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
    968
    969	/* Disable L2 bypass in the UCHE */
    970	gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_LO, 0xffffffc0);
    971	gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_HI, 0x0001ffff);
    972	gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_LO, 0xfffff000);
    973	gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_HI, 0x0001ffff);
    974	gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000);
    975	gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff);
    976
    977	if (!adreno_is_a650_family(adreno_gpu)) {
    978		/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
    979		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO,
    980			REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000);
    981
    982		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
    983			REG_A6XX_UCHE_GMEM_RANGE_MAX_HI,
    984			0x00100000 + adreno_gpu->gmem - 1);
    985	}
    986
    987	gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
    988	gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
    989
    990	if (adreno_is_a640_family(adreno_gpu) ||
    991	    adreno_is_a650_family(adreno_gpu))
    992		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
    993	else
    994		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
    995	gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
    996
    997	if (adreno_is_a660_family(adreno_gpu))
    998		gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
    999
   1000	/* Setting the mem pool size */
   1001	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
   1002
   1003	/* Setting the primFifo thresholds default values,
   1004	 * and vccCacheSkipDis=1 bit (0x200) for A640 and newer
   1005	*/
   1006	if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
   1007		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
   1008	else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu))
   1009		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200);
   1010	else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
   1011		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
   1012	else
   1013		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00180000);
   1014
   1015	/* Set the AHB default slave response to "ERROR" */
   1016	gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
   1017
   1018	/* Turn on performance counters */
   1019	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
   1020
   1021	/* Select CP0 to always count cycles */
   1022	gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
   1023
   1024	a6xx_set_ubwc_config(gpu);
   1025
   1026	/* Enable fault detection */
   1027	gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL,
   1028		(1 << 30) | 0x1fffff);
   1029
   1030	gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1);
   1031
   1032	/* Set weights for bicubic filtering */
   1033	if (adreno_is_a650_family(adreno_gpu)) {
   1034		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
   1035		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
   1036			0x3fe05ff4);
   1037		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
   1038			0x3fa0ebee);
   1039		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
   1040			0x3f5193ed);
   1041		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
   1042			0x3f0243f0);
   1043	}
   1044
   1045	/* Protect registers from the CP */
   1046	a6xx_set_cp_protect(gpu);
   1047
   1048	if (adreno_is_a660_family(adreno_gpu)) {
   1049		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
   1050		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
   1051	}
   1052
   1053	/* Set dualQ + disable afull for A660 GPU */
   1054	if (adreno_is_a660(adreno_gpu))
   1055		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
   1056
   1057	/* Enable expanded apriv for targets that support it */
   1058	if (gpu->hw_apriv) {
   1059		gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
   1060			(1 << 6) | (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1));
   1061	}
   1062
   1063	/* Enable interrupts */
   1064	gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, A6XX_INT_MASK);
   1065
   1066	ret = adreno_hw_init(gpu);
   1067	if (ret)
   1068		goto out;
   1069
   1070	ret = a6xx_ucode_init(gpu);
   1071	if (ret)
   1072		goto out;
   1073
   1074	/* Set the ringbuffer address */
   1075	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI,
   1076		gpu->rb[0]->iova);
   1077
   1078	/* Targets that support extended APRIV can use the RPTR shadow from
   1079	 * hardware but all the other ones need to disable the feature. Targets
   1080	 * that support the WHERE_AM_I opcode can use that instead
   1081	 */
   1082	if (adreno_gpu->base.hw_apriv)
   1083		gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
   1084	else
   1085		gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
   1086			MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
   1087
   1088	/*
   1089	 * Expanded APRIV and targets that support WHERE_AM_I both need a
   1090	 * privileged buffer to store the RPTR shadow
   1091	 */
   1092
   1093	if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) {
   1094		if (!a6xx_gpu->shadow_bo) {
   1095			a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
   1096				sizeof(u32) * gpu->nr_rings,
   1097				MSM_BO_WC | MSM_BO_MAP_PRIV,
   1098				gpu->aspace, &a6xx_gpu->shadow_bo,
   1099				&a6xx_gpu->shadow_iova);
   1100
   1101			if (IS_ERR(a6xx_gpu->shadow))
   1102				return PTR_ERR(a6xx_gpu->shadow);
   1103
   1104			msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
   1105		}
   1106
   1107		gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO,
   1108			REG_A6XX_CP_RB_RPTR_ADDR_HI,
   1109			shadowptr(a6xx_gpu, gpu->rb[0]));
   1110	}
   1111
   1112	/* Always come up on rb 0 */
   1113	a6xx_gpu->cur_ring = gpu->rb[0];
   1114
   1115	gpu->cur_ctx_seqno = 0;
   1116
   1117	/* Enable the SQE_to start the CP engine */
   1118	gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
   1119
   1120	ret = a6xx_cp_init(gpu);
   1121	if (ret)
   1122		goto out;
   1123
   1124	/*
   1125	 * Try to load a zap shader into the secure world. If successful
   1126	 * we can use the CP to switch out of secure mode. If not then we
   1127	 * have no resource but to try to switch ourselves out manually. If we
   1128	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
   1129	 * be blocked and a permissions violation will soon follow.
   1130	 */
   1131	ret = a6xx_zap_shader_init(gpu);
   1132	if (!ret) {
   1133		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
   1134		OUT_RING(gpu->rb[0], 0x00000000);
   1135
   1136		a6xx_flush(gpu, gpu->rb[0]);
   1137		if (!a6xx_idle(gpu, gpu->rb[0]))
   1138			return -EINVAL;
   1139	} else if (ret == -ENODEV) {
   1140		/*
   1141		 * This device does not use zap shader (but print a warning
   1142		 * just in case someone got their dt wrong.. hopefully they
   1143		 * have a debug UART to realize the error of their ways...
   1144		 * if you mess this up you are about to crash horribly)
   1145		 */
   1146		dev_warn_once(gpu->dev->dev,
   1147			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
   1148		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
   1149		ret = 0;
   1150	} else {
   1151		return ret;
   1152	}
   1153
   1154out:
   1155	/*
   1156	 * Tell the GMU that we are done touching the GPU and it can start power
   1157	 * management
   1158	 */
   1159	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
   1160
   1161	if (a6xx_gpu->gmu.legacy) {
   1162		/* Take the GMU out of its special boot mode */
   1163		a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
   1164	}
   1165
   1166	return ret;
   1167}
   1168
   1169static int a6xx_hw_init(struct msm_gpu *gpu)
   1170{
   1171	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1172	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
   1173	int ret;
   1174
   1175	mutex_lock(&a6xx_gpu->gmu.lock);
   1176	ret = hw_init(gpu);
   1177	mutex_unlock(&a6xx_gpu->gmu.lock);
   1178
   1179	return ret;
   1180}
   1181
   1182static void a6xx_dump(struct msm_gpu *gpu)
   1183{
   1184	DRM_DEV_INFO(&gpu->pdev->dev, "status:   %08x\n",
   1185			gpu_read(gpu, REG_A6XX_RBBM_STATUS));
   1186	adreno_dump(gpu);
   1187}
   1188
   1189#define VBIF_RESET_ACK_TIMEOUT	100
   1190#define VBIF_RESET_ACK_MASK	0x00f0
   1191
   1192static void a6xx_recover(struct msm_gpu *gpu)
   1193{
   1194	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1195	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
   1196	int i;
   1197
   1198	adreno_dump_info(gpu);
   1199
   1200	for (i = 0; i < 8; i++)
   1201		DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
   1202			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
   1203
   1204	if (hang_debug)
   1205		a6xx_dump(gpu);
   1206
   1207	/*
   1208	 * Turn off keep alive that might have been enabled by the hang
   1209	 * interrupt
   1210	 */
   1211	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 0);
   1212
   1213	gpu->funcs->pm_suspend(gpu);
   1214	gpu->funcs->pm_resume(gpu);
   1215
   1216	msm_gpu_hw_init(gpu);
   1217}
   1218
   1219static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
   1220{
   1221	static const char *uche_clients[7] = {
   1222		"VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
   1223	};
   1224	u32 val;
   1225
   1226	if (mid < 1 || mid > 3)
   1227		return "UNKNOWN";
   1228
   1229	/*
   1230	 * The source of the data depends on the mid ID read from FSYNR1.
   1231	 * and the client ID read from the UCHE block
   1232	 */
   1233	val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
   1234
   1235	/* mid = 3 is most precise and refers to only one block per client */
   1236	if (mid == 3)
   1237		return uche_clients[val & 7];
   1238
   1239	/* For mid=2 the source is TP or VFD except when the client id is 0 */
   1240	if (mid == 2)
   1241		return ((val & 7) == 0) ? "TP" : "TP|VFD";
   1242
   1243	/* For mid=1 just return "UCHE" as a catchall for everything else */
   1244	return "UCHE";
   1245}
   1246
   1247static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
   1248{
   1249	if (id == 0)
   1250		return "CP";
   1251	else if (id == 4)
   1252		return "CCU";
   1253	else if (id == 6)
   1254		return "CDP Prefetch";
   1255
   1256	return a6xx_uche_fault_block(gpu, id);
   1257}
   1258
   1259#define ARM_SMMU_FSR_TF                 BIT(1)
   1260#define ARM_SMMU_FSR_PF			BIT(3)
   1261#define ARM_SMMU_FSR_EF			BIT(4)
   1262
   1263static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
   1264{
   1265	struct msm_gpu *gpu = arg;
   1266	struct adreno_smmu_fault_info *info = data;
   1267	const char *type = "UNKNOWN";
   1268	const char *block;
   1269	bool do_devcoredump = info && !READ_ONCE(gpu->crashstate);
   1270
   1271	/*
   1272	 * If we aren't going to be resuming later from fault_worker, then do
   1273	 * it now.
   1274	 */
   1275	if (!do_devcoredump) {
   1276		gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu);
   1277	}
   1278
   1279	/*
   1280	 * Print a default message if we couldn't get the data from the
   1281	 * adreno-smmu-priv
   1282	 */
   1283	if (!info) {
   1284		pr_warn_ratelimited("*** gpu fault: iova=%.16lx flags=%d (%u,%u,%u,%u)\n",
   1285			iova, flags,
   1286			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
   1287			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
   1288			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
   1289			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
   1290
   1291		return 0;
   1292	}
   1293
   1294	if (info->fsr & ARM_SMMU_FSR_TF)
   1295		type = "TRANSLATION";
   1296	else if (info->fsr & ARM_SMMU_FSR_PF)
   1297		type = "PERMISSION";
   1298	else if (info->fsr & ARM_SMMU_FSR_EF)
   1299		type = "EXTERNAL";
   1300
   1301	block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
   1302
   1303	pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s type=%s source=%s (%u,%u,%u,%u)\n",
   1304			info->ttbr0, iova,
   1305			flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ",
   1306			type, block,
   1307			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
   1308			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
   1309			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
   1310			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
   1311
   1312	if (do_devcoredump) {
   1313		/* Turn off the hangcheck timer to keep it from bothering us */
   1314		del_timer(&gpu->hangcheck_timer);
   1315
   1316		gpu->fault_info.ttbr0 = info->ttbr0;
   1317		gpu->fault_info.iova  = iova;
   1318		gpu->fault_info.flags = flags;
   1319		gpu->fault_info.type  = type;
   1320		gpu->fault_info.block = block;
   1321
   1322		kthread_queue_work(gpu->worker, &gpu->fault_work);
   1323	}
   1324
   1325	return 0;
   1326}
   1327
   1328static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
   1329{
   1330	u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
   1331
   1332	if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
   1333		u32 val;
   1334
   1335		gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
   1336		val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
   1337		dev_err_ratelimited(&gpu->pdev->dev,
   1338			"CP | opcode error | possible opcode=0x%8.8X\n",
   1339			val);
   1340	}
   1341
   1342	if (status & A6XX_CP_INT_CP_UCODE_ERROR)
   1343		dev_err_ratelimited(&gpu->pdev->dev,
   1344			"CP ucode error interrupt\n");
   1345
   1346	if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
   1347		dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
   1348			gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
   1349
   1350	if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
   1351		u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
   1352
   1353		dev_err_ratelimited(&gpu->pdev->dev,
   1354			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
   1355			val & (1 << 20) ? "READ" : "WRITE",
   1356			(val & 0x3ffff), val);
   1357	}
   1358
   1359	if (status & A6XX_CP_INT_CP_AHB_ERROR)
   1360		dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
   1361
   1362	if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
   1363		dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
   1364
   1365	if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
   1366		dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
   1367
   1368}
   1369
   1370static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
   1371{
   1372	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1373	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
   1374	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
   1375
   1376	/*
   1377	 * If stalled on SMMU fault, we could trip the GPU's hang detection,
   1378	 * but the fault handler will trigger the devcore dump, and we want
   1379	 * to otherwise resume normally rather than killing the submit, so
   1380	 * just bail.
   1381	 */
   1382	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
   1383		return;
   1384
   1385	/*
   1386	 * Force the GPU to stay on until after we finish
   1387	 * collecting information
   1388	 */
   1389	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
   1390
   1391	DRM_DEV_ERROR(&gpu->pdev->dev,
   1392		"gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
   1393		ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
   1394		gpu_read(gpu, REG_A6XX_RBBM_STATUS),
   1395		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
   1396		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
   1397		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI),
   1398		gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
   1399		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI),
   1400		gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
   1401
   1402	/* Turn off the hangcheck timer to keep it from bothering us */
   1403	del_timer(&gpu->hangcheck_timer);
   1404
   1405	kthread_queue_work(gpu->worker, &gpu->recover_work);
   1406}
   1407
   1408static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
   1409{
   1410	struct msm_drm_private *priv = gpu->dev->dev_private;
   1411	u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
   1412
   1413	gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
   1414
   1415	if (priv->disable_err_irq)
   1416		status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
   1417
   1418	if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
   1419		a6xx_fault_detect_irq(gpu);
   1420
   1421	if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
   1422		dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
   1423
   1424	if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
   1425		a6xx_cp_hw_err_irq(gpu);
   1426
   1427	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
   1428		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
   1429
   1430	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
   1431		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
   1432
   1433	if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
   1434		dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
   1435
   1436	if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
   1437		msm_gpu_retire(gpu);
   1438
   1439	return IRQ_HANDLED;
   1440}
   1441
   1442static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or)
   1443{
   1444	return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or);
   1445}
   1446
   1447static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value)
   1448{
   1449	return msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2));
   1450}
   1451
   1452static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
   1453{
   1454	llcc_slice_deactivate(a6xx_gpu->llc_slice);
   1455	llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
   1456}
   1457
   1458static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
   1459{
   1460	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
   1461	struct msm_gpu *gpu = &adreno_gpu->base;
   1462	u32 cntl1_regval = 0;
   1463
   1464	if (IS_ERR(a6xx_gpu->llc_mmio))
   1465		return;
   1466
   1467	if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
   1468		u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
   1469
   1470		gpu_scid &= 0x1f;
   1471		cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
   1472			       (gpu_scid << 15) | (gpu_scid << 20);
   1473
   1474		/* On A660, the SCID programming for UCHE traffic is done in
   1475		 * A6XX_GBIF_SCACHE_CNTL0[14:10]
   1476		 */
   1477		if (adreno_is_a660_family(adreno_gpu))
   1478			gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
   1479				(1 << 8), (gpu_scid << 10) | (1 << 8));
   1480	}
   1481
   1482	/*
   1483	 * For targets with a MMU500, activate the slice but don't program the
   1484	 * register.  The XBL will take care of that.
   1485	 */
   1486	if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
   1487		if (!a6xx_gpu->have_mmu500) {
   1488			u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
   1489
   1490			gpuhtw_scid &= 0x1f;
   1491			cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
   1492		}
   1493	}
   1494
   1495	if (!cntl1_regval)
   1496		return;
   1497
   1498	/*
   1499	 * Program the slice IDs for the various GPU blocks and GPU MMU
   1500	 * pagetables
   1501	 */
   1502	if (!a6xx_gpu->have_mmu500) {
   1503		a6xx_llc_write(a6xx_gpu,
   1504			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
   1505
   1506		/*
   1507		 * Program cacheability overrides to not allocate cache
   1508		 * lines on a write miss
   1509		 */
   1510		a6xx_llc_rmw(a6xx_gpu,
   1511			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
   1512		return;
   1513	}
   1514
   1515	gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
   1516}
   1517
   1518static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
   1519{
   1520	llcc_slice_putd(a6xx_gpu->llc_slice);
   1521	llcc_slice_putd(a6xx_gpu->htw_llc_slice);
   1522}
   1523
   1524static void a6xx_llc_slices_init(struct platform_device *pdev,
   1525		struct a6xx_gpu *a6xx_gpu)
   1526{
   1527	struct device_node *phandle;
   1528
   1529	/*
   1530	 * There is a different programming path for targets with an mmu500
   1531	 * attached, so detect if that is the case
   1532	 */
   1533	phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
   1534	a6xx_gpu->have_mmu500 = (phandle &&
   1535		of_device_is_compatible(phandle, "arm,mmu-500"));
   1536	of_node_put(phandle);
   1537
   1538	if (a6xx_gpu->have_mmu500)
   1539		a6xx_gpu->llc_mmio = NULL;
   1540	else
   1541		a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem");
   1542
   1543	a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
   1544	a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
   1545
   1546	if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
   1547		a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
   1548}
   1549
   1550static int a6xx_pm_resume(struct msm_gpu *gpu)
   1551{
   1552	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1553	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
   1554	int ret;
   1555
   1556	gpu->needs_hw_init = true;
   1557
   1558	trace_msm_gpu_resume(0);
   1559
   1560	mutex_lock(&a6xx_gpu->gmu.lock);
   1561	ret = a6xx_gmu_resume(a6xx_gpu);
   1562	mutex_unlock(&a6xx_gpu->gmu.lock);
   1563	if (ret)
   1564		return ret;
   1565
   1566	msm_devfreq_resume(gpu);
   1567
   1568	a6xx_llc_activate(a6xx_gpu);
   1569
   1570	return 0;
   1571}
   1572
   1573static int a6xx_pm_suspend(struct msm_gpu *gpu)
   1574{
   1575	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1576	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
   1577	int i, ret;
   1578
   1579	trace_msm_gpu_suspend(0);
   1580
   1581	a6xx_llc_deactivate(a6xx_gpu);
   1582
   1583	msm_devfreq_suspend(gpu);
   1584
   1585	mutex_lock(&a6xx_gpu->gmu.lock);
   1586	ret = a6xx_gmu_stop(a6xx_gpu);
   1587	mutex_unlock(&a6xx_gpu->gmu.lock);
   1588	if (ret)
   1589		return ret;
   1590
   1591	if (a6xx_gpu->shadow_bo)
   1592		for (i = 0; i < gpu->nr_rings; i++)
   1593			a6xx_gpu->shadow[i] = 0;
   1594
   1595	gpu->suspend_count++;
   1596
   1597	return 0;
   1598}
   1599
   1600static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
   1601{
   1602	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1603	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
   1604
   1605	mutex_lock(&a6xx_gpu->gmu.lock);
   1606
   1607	/* Force the GPU power on so we can read this register */
   1608	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
   1609
   1610	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
   1611			    REG_A6XX_CP_ALWAYS_ON_COUNTER_HI);
   1612
   1613	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
   1614
   1615	mutex_unlock(&a6xx_gpu->gmu.lock);
   1616
   1617	return 0;
   1618}
   1619
   1620static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
   1621{
   1622	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1623	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
   1624
   1625	return a6xx_gpu->cur_ring;
   1626}
   1627
   1628static void a6xx_destroy(struct msm_gpu *gpu)
   1629{
   1630	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1631	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
   1632
   1633	if (a6xx_gpu->sqe_bo) {
   1634		msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
   1635		drm_gem_object_put(a6xx_gpu->sqe_bo);
   1636	}
   1637
   1638	if (a6xx_gpu->shadow_bo) {
   1639		msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace);
   1640		drm_gem_object_put(a6xx_gpu->shadow_bo);
   1641	}
   1642
   1643	a6xx_llc_slices_destroy(a6xx_gpu);
   1644
   1645	a6xx_gmu_remove(a6xx_gpu);
   1646
   1647	adreno_gpu_cleanup(adreno_gpu);
   1648
   1649	kfree(a6xx_gpu);
   1650}
   1651
   1652static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
   1653{
   1654	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1655	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
   1656	u64 busy_cycles;
   1657
   1658	/* 19.2MHz */
   1659	*out_sample_rate = 19200000;
   1660
   1661	/* Only read the gpu busy if the hardware is already active */
   1662	if (pm_runtime_get_if_in_use(a6xx_gpu->gmu.dev) == 0)
   1663		return 0;
   1664
   1665	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
   1666			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
   1667			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
   1668
   1669
   1670	pm_runtime_put(a6xx_gpu->gmu.dev);
   1671
   1672	return busy_cycles;
   1673}
   1674
   1675static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp)
   1676{
   1677	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1678	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
   1679
   1680	mutex_lock(&a6xx_gpu->gmu.lock);
   1681	a6xx_gmu_set_freq(gpu, opp);
   1682	mutex_unlock(&a6xx_gpu->gmu.lock);
   1683}
   1684
   1685static struct msm_gem_address_space *
   1686a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
   1687{
   1688	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1689	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
   1690	struct iommu_domain *iommu;
   1691	struct msm_mmu *mmu;
   1692	struct msm_gem_address_space *aspace;
   1693	u64 start, size;
   1694
   1695	iommu = iommu_domain_alloc(&platform_bus_type);
   1696	if (!iommu)
   1697		return NULL;
   1698
   1699	/*
   1700	 * This allows GPU to set the bus attributes required to use system
   1701	 * cache on behalf of the iommu page table walker.
   1702	 */
   1703	if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
   1704		adreno_set_llc_attributes(iommu);
   1705
   1706	mmu = msm_iommu_new(&pdev->dev, iommu);
   1707	if (IS_ERR(mmu)) {
   1708		iommu_domain_free(iommu);
   1709		return ERR_CAST(mmu);
   1710	}
   1711
   1712	/*
   1713	 * Use the aperture start or SZ_16M, whichever is greater. This will
   1714	 * ensure that we align with the allocated pagetable range while still
   1715	 * allowing room in the lower 32 bits for GMEM and whatnot
   1716	 */
   1717	start = max_t(u64, SZ_16M, iommu->geometry.aperture_start);
   1718	size = iommu->geometry.aperture_end - start + 1;
   1719
   1720	aspace = msm_gem_address_space_create(mmu, "gpu",
   1721		start & GENMASK_ULL(48, 0), size);
   1722
   1723	if (IS_ERR(aspace) && !IS_ERR(mmu))
   1724		mmu->funcs->destroy(mmu);
   1725
   1726	return aspace;
   1727}
   1728
   1729static struct msm_gem_address_space *
   1730a6xx_create_private_address_space(struct msm_gpu *gpu)
   1731{
   1732	struct msm_mmu *mmu;
   1733
   1734	mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
   1735
   1736	if (IS_ERR(mmu))
   1737		return ERR_CAST(mmu);
   1738
   1739	return msm_gem_address_space_create(mmu,
   1740		"gpu", 0x100000000ULL, SZ_4G);
   1741}
   1742
   1743static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
   1744{
   1745	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   1746	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
   1747
   1748	if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
   1749		return a6xx_gpu->shadow[ring->id];
   1750
   1751	return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
   1752}
   1753
   1754static u32 a618_get_speed_bin(u32 fuse)
   1755{
   1756	if (fuse == 0)
   1757		return 0;
   1758	else if (fuse == 169)
   1759		return 1;
   1760	else if (fuse == 174)
   1761		return 2;
   1762
   1763	return UINT_MAX;
   1764}
   1765
   1766static u32 adreno_7c3_get_speed_bin(u32 fuse)
   1767{
   1768	if (fuse == 0)
   1769		return 0;
   1770	else if (fuse == 117)
   1771		return 0;
   1772	else if (fuse == 190)
   1773		return 1;
   1774
   1775	return UINT_MAX;
   1776}
   1777
   1778static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
   1779{
   1780	u32 val = UINT_MAX;
   1781
   1782	if (adreno_cmp_rev(ADRENO_REV(6, 1, 8, ANY_ID), rev))
   1783		val = a618_get_speed_bin(fuse);
   1784
   1785	if (adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), rev))
   1786		val = adreno_7c3_get_speed_bin(fuse);
   1787
   1788	if (val == UINT_MAX) {
   1789		DRM_DEV_ERROR(dev,
   1790			"missing support for speed-bin: %u. Some OPPs may not be supported by hardware",
   1791			fuse);
   1792		return UINT_MAX;
   1793	}
   1794
   1795	return (1 << val);
   1796}
   1797
   1798static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
   1799{
   1800	u32 supp_hw = UINT_MAX;
   1801	u32 speedbin;
   1802	int ret;
   1803
   1804	ret = adreno_read_speedbin(dev, &speedbin);
   1805	/*
   1806	 * -ENOENT means that the platform doesn't support speedbin which is
   1807	 * fine
   1808	 */
   1809	if (ret == -ENOENT) {
   1810		return 0;
   1811	} else if (ret) {
   1812		DRM_DEV_ERROR(dev,
   1813			      "failed to read speed-bin (%d). Some OPPs may not be supported by hardware",
   1814			      ret);
   1815		goto done;
   1816	}
   1817
   1818	supp_hw = fuse_to_supp_hw(dev, rev, speedbin);
   1819
   1820done:
   1821	ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
   1822	if (ret)
   1823		return ret;
   1824
   1825	return 0;
   1826}
   1827
   1828static const struct adreno_gpu_funcs funcs = {
   1829	.base = {
   1830		.get_param = adreno_get_param,
   1831		.set_param = adreno_set_param,
   1832		.hw_init = a6xx_hw_init,
   1833		.pm_suspend = a6xx_pm_suspend,
   1834		.pm_resume = a6xx_pm_resume,
   1835		.recover = a6xx_recover,
   1836		.submit = a6xx_submit,
   1837		.active_ring = a6xx_active_ring,
   1838		.irq = a6xx_irq,
   1839		.destroy = a6xx_destroy,
   1840#if defined(CONFIG_DRM_MSM_GPU_STATE)
   1841		.show = a6xx_show,
   1842#endif
   1843		.gpu_busy = a6xx_gpu_busy,
   1844		.gpu_get_freq = a6xx_gmu_get_freq,
   1845		.gpu_set_freq = a6xx_gpu_set_freq,
   1846#if defined(CONFIG_DRM_MSM_GPU_STATE)
   1847		.gpu_state_get = a6xx_gpu_state_get,
   1848		.gpu_state_put = a6xx_gpu_state_put,
   1849#endif
   1850		.create_address_space = a6xx_create_address_space,
   1851		.create_private_address_space = a6xx_create_private_address_space,
   1852		.get_rptr = a6xx_get_rptr,
   1853	},
   1854	.get_timestamp = a6xx_get_timestamp,
   1855};
   1856
   1857struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
   1858{
   1859	struct msm_drm_private *priv = dev->dev_private;
   1860	struct platform_device *pdev = priv->gpu_pdev;
   1861	struct adreno_platform_config *config = pdev->dev.platform_data;
   1862	const struct adreno_info *info;
   1863	struct device_node *node;
   1864	struct a6xx_gpu *a6xx_gpu;
   1865	struct adreno_gpu *adreno_gpu;
   1866	struct msm_gpu *gpu;
   1867	int ret;
   1868
   1869	a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
   1870	if (!a6xx_gpu)
   1871		return ERR_PTR(-ENOMEM);
   1872
   1873	adreno_gpu = &a6xx_gpu->base;
   1874	gpu = &adreno_gpu->base;
   1875
   1876	adreno_gpu->registers = NULL;
   1877
   1878	/*
   1879	 * We need to know the platform type before calling into adreno_gpu_init
   1880	 * so that the hw_apriv flag can be correctly set. Snoop into the info
   1881	 * and grab the revision number
   1882	 */
   1883	info = adreno_info(config->rev);
   1884
   1885	if (info && (info->revn == 650 || info->revn == 660 ||
   1886			adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev)))
   1887		adreno_gpu->base.hw_apriv = true;
   1888
   1889	/*
   1890	 * For now only clamp to idle freq for devices where this is known not
   1891	 * to cause power supply issues:
   1892	 */
   1893	if (info && (info->revn == 618))
   1894		gpu->clamp_to_idle = true;
   1895
   1896	a6xx_llc_slices_init(pdev, a6xx_gpu);
   1897
   1898	ret = a6xx_set_supported_hw(&pdev->dev, config->rev);
   1899	if (ret) {
   1900		a6xx_destroy(&(a6xx_gpu->base.base));
   1901		return ERR_PTR(ret);
   1902	}
   1903
   1904	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
   1905	if (ret) {
   1906		a6xx_destroy(&(a6xx_gpu->base.base));
   1907		return ERR_PTR(ret);
   1908	}
   1909
   1910	/* Check if there is a GMU phandle and set it up */
   1911	node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
   1912
   1913	/* FIXME: How do we gracefully handle this? */
   1914	BUG_ON(!node);
   1915
   1916	ret = a6xx_gmu_init(a6xx_gpu, node);
   1917	of_node_put(node);
   1918	if (ret) {
   1919		a6xx_destroy(&(a6xx_gpu->base.base));
   1920		return ERR_PTR(ret);
   1921	}
   1922
   1923	if (gpu->aspace)
   1924		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
   1925				a6xx_fault_handler);
   1926
   1927	return gpu;
   1928}