cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

a4xx_gpu.c (23040B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/* Copyright (c) 2014 The Linux Foundation. All rights reserved.
      3 */
      4#include "a4xx_gpu.h"
      5
      6#define A4XX_INT0_MASK \
      7	(A4XX_INT0_RBBM_AHB_ERROR |        \
      8	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
      9	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
     10	 A4XX_INT0_CP_OPCODE_ERROR |       \
     11	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
     12	 A4XX_INT0_CP_HW_FAULT |           \
     13	 A4XX_INT0_CP_IB1_INT |            \
     14	 A4XX_INT0_CP_IB2_INT |            \
     15	 A4XX_INT0_CP_RB_INT |             \
     16	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
     17	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
     18	 A4XX_INT0_CACHE_FLUSH_TS |        \
     19	 A4XX_INT0_UCHE_OOB_ACCESS)
     20
     21extern bool hang_debug;
     22static void a4xx_dump(struct msm_gpu *gpu);
     23static bool a4xx_idle(struct msm_gpu *gpu);
     24
     25static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
     26{
     27	struct msm_ringbuffer *ring = submit->ring;
     28	unsigned int i;
     29
     30	for (i = 0; i < submit->nr_cmds; i++) {
     31		switch (submit->cmd[i].type) {
     32		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
     33			/* ignore IB-targets */
     34			break;
     35		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
     36			/* ignore if there has not been a ctx switch: */
     37			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
     38				break;
     39			fallthrough;
     40		case MSM_SUBMIT_CMD_BUF:
     41			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
     42			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
     43			OUT_RING(ring, submit->cmd[i].size);
     44			OUT_PKT2(ring);
     45			break;
     46		}
     47	}
     48
     49	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
     50	OUT_RING(ring, submit->seqno);
     51
     52	/* Flush HLSQ lazy updates to make sure there is nothing
     53	 * pending for indirect loads after the timestamp has
     54	 * passed:
     55	 */
     56	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
     57	OUT_RING(ring, HLSQ_FLUSH);
     58
     59	/* wait for idle before cache flush/interrupt */
     60	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
     61	OUT_RING(ring, 0x00000000);
     62
     63	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
     64	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
     65	OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
     66	OUT_RING(ring, rbmemptr(ring, fence));
     67	OUT_RING(ring, submit->seqno);
     68
     69	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
     70}
     71
     72/*
     73 * a4xx_enable_hwcg() - Program the clock control registers
     74 * @device: The adreno device pointer
     75 */
     76static void a4xx_enable_hwcg(struct msm_gpu *gpu)
     77{
     78	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
     79	unsigned int i;
     80	for (i = 0; i < 4; i++)
     81		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
     82	for (i = 0; i < 4; i++)
     83		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
     84	for (i = 0; i < 4; i++)
     85		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
     86	for (i = 0; i < 4; i++)
     87		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
     88	for (i = 0; i < 4; i++)
     89		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
     90	for (i = 0; i < 4; i++)
     91		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
     92	for (i = 0; i < 4; i++)
     93		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
     94	for (i = 0; i < 4; i++)
     95		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
     96	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
     97	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
     98	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
     99	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
    100	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
    101	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
    102	for (i = 0; i < 4; i++)
    103		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
    104
    105	/* Disable L1 clocking in A420 due to CCU issues with it */
    106	for (i = 0; i < 4; i++) {
    107		if (adreno_is_a420(adreno_gpu)) {
    108			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
    109					0x00002020);
    110		} else {
    111			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
    112					0x00022020);
    113		}
    114	}
    115
    116	/* No CCU for A405 */
    117	if (!adreno_is_a405(adreno_gpu)) {
    118		for (i = 0; i < 4; i++) {
    119			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
    120					0x00000922);
    121		}
    122
    123		for (i = 0; i < 4; i++) {
    124			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
    125					0x00000000);
    126		}
    127
    128		for (i = 0; i < 4; i++) {
    129			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
    130					0x00000001);
    131		}
    132	}
    133
    134	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
    135	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
    136	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
    137	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
    138	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
    139	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
    140	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
    141	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
    142	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
    143	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
    144	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
    145	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
    146	/* Early A430's have a timing issue with SP/TP power collapse;
    147	   disabling HW clock gating prevents it. */
    148	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
    149		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
    150	else
    151		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
    152	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
    153}
    154
    155
    156static bool a4xx_me_init(struct msm_gpu *gpu)
    157{
    158	struct msm_ringbuffer *ring = gpu->rb[0];
    159
    160	OUT_PKT3(ring, CP_ME_INIT, 17);
    161	OUT_RING(ring, 0x000003f7);
    162	OUT_RING(ring, 0x00000000);
    163	OUT_RING(ring, 0x00000000);
    164	OUT_RING(ring, 0x00000000);
    165	OUT_RING(ring, 0x00000080);
    166	OUT_RING(ring, 0x00000100);
    167	OUT_RING(ring, 0x00000180);
    168	OUT_RING(ring, 0x00006600);
    169	OUT_RING(ring, 0x00000150);
    170	OUT_RING(ring, 0x0000014e);
    171	OUT_RING(ring, 0x00000154);
    172	OUT_RING(ring, 0x00000001);
    173	OUT_RING(ring, 0x00000000);
    174	OUT_RING(ring, 0x00000000);
    175	OUT_RING(ring, 0x00000000);
    176	OUT_RING(ring, 0x00000000);
    177	OUT_RING(ring, 0x00000000);
    178
    179	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
    180	return a4xx_idle(gpu);
    181}
    182
    183static int a4xx_hw_init(struct msm_gpu *gpu)
    184{
    185	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    186	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
    187	uint32_t *ptr, len;
    188	int i, ret;
    189
    190	if (adreno_is_a405(adreno_gpu)) {
    191		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
    192	} else if (adreno_is_a420(adreno_gpu)) {
    193		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
    194		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
    195		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
    196		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
    197		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
    198		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
    199		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
    200		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
    201	} else if (adreno_is_a430(adreno_gpu)) {
    202		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
    203		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
    204		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
    205		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
    206		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
    207		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
    208	} else {
    209		BUG();
    210	}
    211
    212	/* Make all blocks contribute to the GPU BUSY perf counter */
    213	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
    214
    215	/* Tune the hystersis counters for SP and CP idle detection */
    216	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
    217	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
    218
    219	if (adreno_is_a430(adreno_gpu)) {
    220		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
    221	}
    222
    223	 /* Enable the RBBM error reporting bits */
    224	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
    225
    226	/* Enable AHB error reporting*/
    227	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
    228
    229	/* Enable power counters*/
    230	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
    231
    232	/*
    233	 * Turn on hang detection - this spews a lot of useful information
    234	 * into the RBBM registers on a hang:
    235	 */
    236	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
    237			(1 << 30) | 0xFFFF);
    238
    239	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
    240			(unsigned int)(a4xx_gpu->ocmem.base >> 14));
    241
    242	/* Turn on performance counters: */
    243	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
    244
    245	/* use the first CP counter for timestamp queries.. userspace may set
    246	 * this as well but it selects the same counter/countable:
    247	 */
    248	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
    249
    250	if (adreno_is_a430(adreno_gpu))
    251		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
    252
    253	/* Disable L2 bypass to avoid UCHE out of bounds errors */
    254	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
    255	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
    256
    257	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
    258			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
    259
    260	/* On A430 enable SP regfile sleep for power savings */
    261	/* TODO downstream does this for !420, so maybe applies for 405 too? */
    262	if (!adreno_is_a420(adreno_gpu)) {
    263		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
    264			0x00000441);
    265		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
    266			0x00000441);
    267	}
    268
    269	a4xx_enable_hwcg(gpu);
    270
    271	/*
    272	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
    273	 * due to timing issue with HLSQ_TP_CLK_EN
    274	 */
    275	if (adreno_is_a420(adreno_gpu)) {
    276		unsigned int val;
    277		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
    278		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
    279		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
    280		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
    281	}
    282
    283	/* setup access protection: */
    284	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
    285
    286	/* RBBM registers */
    287	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
    288	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
    289	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
    290	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
    291	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
    292	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
    293
    294	/* CP registers */
    295	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
    296	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
    297
    298
    299	/* RB registers */
    300	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
    301
    302	/* HLSQ registers */
    303	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
    304
    305	/* VPC registers */
    306	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
    307
    308	/* SMMU registers */
    309	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
    310
    311	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
    312
    313	ret = adreno_hw_init(gpu);
    314	if (ret)
    315		return ret;
    316
    317	/*
    318	 * Use the default ringbuffer size and block size but disable the RPTR
    319	 * shadow
    320	 */
    321	gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
    322		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
    323
    324	/* Set the ringbuffer address */
    325	gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
    326
    327	/* Load PM4: */
    328	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
    329	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
    330	DBG("loading PM4 ucode version: %u", ptr[0]);
    331	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
    332	for (i = 1; i < len; i++)
    333		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
    334
    335	/* Load PFP: */
    336	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
    337	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
    338	DBG("loading PFP ucode version: %u", ptr[0]);
    339
    340	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
    341	for (i = 1; i < len; i++)
    342		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
    343
    344	/* clear ME_HALT to start micro engine */
    345	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
    346
    347	return a4xx_me_init(gpu) ? 0 : -EINVAL;
    348}
    349
    350static void a4xx_recover(struct msm_gpu *gpu)
    351{
    352	int i;
    353
    354	adreno_dump_info(gpu);
    355
    356	for (i = 0; i < 8; i++) {
    357		printk("CP_SCRATCH_REG%d: %u\n", i,
    358			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
    359	}
    360
    361	/* dump registers before resetting gpu, if enabled: */
    362	if (hang_debug)
    363		a4xx_dump(gpu);
    364
    365	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
    366	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
    367	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
    368	adreno_recover(gpu);
    369}
    370
    371static void a4xx_destroy(struct msm_gpu *gpu)
    372{
    373	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    374	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
    375
    376	DBG("%s", gpu->name);
    377
    378	adreno_gpu_cleanup(adreno_gpu);
    379
    380	adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
    381
    382	kfree(a4xx_gpu);
    383}
    384
    385static bool a4xx_idle(struct msm_gpu *gpu)
    386{
    387	/* wait for ringbuffer to drain: */
    388	if (!adreno_idle(gpu, gpu->rb[0]))
    389		return false;
    390
    391	/* then wait for GPU to finish: */
    392	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
    393					A4XX_RBBM_STATUS_GPU_BUSY))) {
    394		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
    395		/* TODO maybe we need to reset GPU here to recover from hang? */
    396		return false;
    397	}
    398
    399	return true;
    400}
    401
    402static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
    403{
    404	uint32_t status;
    405
    406	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
    407	DBG("%s: Int status %08x", gpu->name, status);
    408
    409	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
    410		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
    411		printk("CP | Protected mode error| %s | addr=%x\n",
    412			reg & (1 << 24) ? "WRITE" : "READ",
    413			(reg & 0xFFFFF) >> 2);
    414	}
    415
    416	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
    417
    418	msm_gpu_retire(gpu);
    419
    420	return IRQ_HANDLED;
    421}
    422
    423static const unsigned int a4xx_registers[] = {
    424	/* RBBM */
    425	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
    426	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
    427	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
    428	/* CP */
    429	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
    430	0x0578, 0x058F,
    431	/* VSC */
    432	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
    433	/* GRAS */
    434	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
    435	/* RB */
    436	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
    437	/* PC */
    438	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
    439	/* VFD */
    440	0x0E40, 0x0E4A,
    441	/* VPC */
    442	0x0E60, 0x0E61, 0x0E63, 0x0E68,
    443	/* UCHE */
    444	0x0E80, 0x0E84, 0x0E88, 0x0E95,
    445	/* VMIDMT */
    446	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
    447	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
    448	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
    449	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
    450	0x1380, 0x1380,
    451	/* GRAS CTX 0 */
    452	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
    453	/* PC CTX 0 */
    454	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
    455	/* VFD CTX 0 */
    456	0x2200, 0x2204, 0x2208, 0x22A9,
    457	/* GRAS CTX 1 */
    458	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
    459	/* PC CTX 1 */
    460	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
    461	/* VFD CTX 1 */
    462	0x2600, 0x2604, 0x2608, 0x26A9,
    463	/* XPU */
    464	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
    465	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
    466	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
    467	/* VBIF */
    468	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
    469	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
    470	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
    471	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
    472	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
    473	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
    474	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
    475	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
    476	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
    477	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
    478	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
    479	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
    480	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
    481	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
    482	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
    483	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
    484	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
    485	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
    486	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
    487	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
    488	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
    489	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
    490	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
    491	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
    492	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
    493	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
    494	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
    495	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
    496	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
    497	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
    498	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
    499	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
    500	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
    501	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
    502	~0 /* sentinel */
    503};
    504
    505static const unsigned int a405_registers[] = {
    506	/* RBBM */
    507	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
    508	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
    509	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
    510	/* CP */
    511	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
    512	0x0578, 0x058F,
    513	/* VSC */
    514	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
    515	/* GRAS */
    516	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
    517	/* RB */
    518	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
    519	/* PC */
    520	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
    521	/* VFD */
    522	0x0E40, 0x0E4A,
    523	/* VPC */
    524	0x0E60, 0x0E61, 0x0E63, 0x0E68,
    525	/* UCHE */
    526	0x0E80, 0x0E84, 0x0E88, 0x0E95,
    527	/* GRAS CTX 0 */
    528	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
    529	/* PC CTX 0 */
    530	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
    531	/* VFD CTX 0 */
    532	0x2200, 0x2204, 0x2208, 0x22A9,
    533	/* GRAS CTX 1 */
    534	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
    535	/* PC CTX 1 */
    536	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
    537	/* VFD CTX 1 */
    538	0x2600, 0x2604, 0x2608, 0x26A9,
    539	/* VBIF version 0x20050000*/
    540	0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
    541	0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
    542	0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
    543	0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
    544	0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
    545	0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
    546	0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
    547	0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
    548	~0 /* sentinel */
    549};
    550
    551static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
    552{
    553	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
    554
    555	if (!state)
    556		return ERR_PTR(-ENOMEM);
    557
    558	adreno_gpu_state_get(gpu, state);
    559
    560	state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
    561
    562	return state;
    563}
    564
    565static void a4xx_dump(struct msm_gpu *gpu)
    566{
    567	printk("status:   %08x\n",
    568			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
    569	adreno_dump(gpu);
    570}
    571
    572static int a4xx_pm_resume(struct msm_gpu *gpu) {
    573	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    574	int ret;
    575
    576	ret = msm_gpu_pm_resume(gpu);
    577	if (ret)
    578		return ret;
    579
    580	if (adreno_is_a430(adreno_gpu)) {
    581		unsigned int reg;
    582		/* Set the default register values; set SW_COLLAPSE to 0 */
    583		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
    584		do {
    585			udelay(5);
    586			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
    587		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
    588	}
    589	return 0;
    590}
    591
    592static int a4xx_pm_suspend(struct msm_gpu *gpu) {
    593	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    594	int ret;
    595
    596	ret = msm_gpu_pm_suspend(gpu);
    597	if (ret)
    598		return ret;
    599
    600	if (adreno_is_a430(adreno_gpu)) {
    601		/* Set the default register values; set SW_COLLAPSE to 1 */
    602		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
    603	}
    604	return 0;
    605}
    606
    607static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
    608{
    609	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
    610		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
    611
    612	return 0;
    613}
    614
    615static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
    616{
    617	ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
    618	return ring->memptrs->rptr;
    619}
    620
    621static const struct adreno_gpu_funcs funcs = {
    622	.base = {
    623		.get_param = adreno_get_param,
    624		.set_param = adreno_set_param,
    625		.hw_init = a4xx_hw_init,
    626		.pm_suspend = a4xx_pm_suspend,
    627		.pm_resume = a4xx_pm_resume,
    628		.recover = a4xx_recover,
    629		.submit = a4xx_submit,
    630		.active_ring = adreno_active_ring,
    631		.irq = a4xx_irq,
    632		.destroy = a4xx_destroy,
    633#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
    634		.show = adreno_show,
    635#endif
    636		.gpu_state_get = a4xx_gpu_state_get,
    637		.gpu_state_put = adreno_gpu_state_put,
    638		.create_address_space = adreno_iommu_create_address_space,
    639		.get_rptr = a4xx_get_rptr,
    640	},
    641	.get_timestamp = a4xx_get_timestamp,
    642};
    643
    644struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
    645{
    646	struct a4xx_gpu *a4xx_gpu = NULL;
    647	struct adreno_gpu *adreno_gpu;
    648	struct msm_gpu *gpu;
    649	struct msm_drm_private *priv = dev->dev_private;
    650	struct platform_device *pdev = priv->gpu_pdev;
    651	struct icc_path *ocmem_icc_path;
    652	struct icc_path *icc_path;
    653	int ret;
    654
    655	if (!pdev) {
    656		DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
    657		ret = -ENXIO;
    658		goto fail;
    659	}
    660
    661	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
    662	if (!a4xx_gpu) {
    663		ret = -ENOMEM;
    664		goto fail;
    665	}
    666
    667	adreno_gpu = &a4xx_gpu->base;
    668	gpu = &adreno_gpu->base;
    669
    670	gpu->perfcntrs = NULL;
    671	gpu->num_perfcntrs = 0;
    672
    673	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
    674	if (ret)
    675		goto fail;
    676
    677	adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
    678							     a4xx_registers;
    679
    680	/* if needed, allocate gmem: */
    681	ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
    682				    &a4xx_gpu->ocmem);
    683	if (ret)
    684		goto fail;
    685
    686	if (!gpu->aspace) {
    687		/* TODO we think it is possible to configure the GPU to
    688		 * restrict access to VRAM carveout.  But the required
    689		 * registers are unknown.  For now just bail out and
    690		 * limp along with just modesetting.  If it turns out
    691		 * to not be possible to restrict access, then we must
    692		 * implement a cmdstream validator.
    693		 */
    694		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
    695		if (!allow_vram_carveout) {
    696			ret = -ENXIO;
    697			goto fail;
    698		}
    699	}
    700
    701	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
    702	if (IS_ERR(icc_path)) {
    703		ret = PTR_ERR(icc_path);
    704		goto fail;
    705	}
    706
    707	ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
    708	if (IS_ERR(ocmem_icc_path)) {
    709		ret = PTR_ERR(ocmem_icc_path);
    710		/* allow -ENODATA, ocmem icc is optional */
    711		if (ret != -ENODATA)
    712			goto fail;
    713		ocmem_icc_path = NULL;
    714	}
    715
    716	/*
    717	 * Set the ICC path to maximum speed for now by multiplying the fastest
    718	 * frequency by the bus width (8). We'll want to scale this later on to
    719	 * improve battery life.
    720	 */
    721	icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
    722	icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
    723
    724	return gpu;
    725
    726fail:
    727	if (a4xx_gpu)
    728		a4xx_destroy(&a4xx_gpu->base.base);
    729
    730	return ERR_PTR(ret);
    731}