cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

a3xx_gpu.c (19784B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2013 Red Hat
      4 * Author: Rob Clark <robdclark@gmail.com>
      5 *
      6 * Copyright (c) 2014 The Linux Foundation. All rights reserved.
      7 */
      8
      9#include "a3xx_gpu.h"
     10
     11#define A3XX_INT0_MASK \
     12	(A3XX_INT0_RBBM_AHB_ERROR |        \
     13	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
     14	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
     15	 A3XX_INT0_CP_OPCODE_ERROR |       \
     16	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
     17	 A3XX_INT0_CP_HW_FAULT |           \
     18	 A3XX_INT0_CP_IB1_INT |            \
     19	 A3XX_INT0_CP_IB2_INT |            \
     20	 A3XX_INT0_CP_RB_INT |             \
     21	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
     22	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
     23	 A3XX_INT0_CACHE_FLUSH_TS |        \
     24	 A3XX_INT0_UCHE_OOB_ACCESS)
     25
     26extern bool hang_debug;
     27
     28static void a3xx_dump(struct msm_gpu *gpu);
     29static bool a3xx_idle(struct msm_gpu *gpu);
     30
     31static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
     32{
     33	struct msm_ringbuffer *ring = submit->ring;
     34	unsigned int i;
     35
     36	for (i = 0; i < submit->nr_cmds; i++) {
     37		switch (submit->cmd[i].type) {
     38		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
     39			/* ignore IB-targets */
     40			break;
     41		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
     42			/* ignore if there has not been a ctx switch: */
     43			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
     44				break;
     45			fallthrough;
     46		case MSM_SUBMIT_CMD_BUF:
     47			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
     48			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
     49			OUT_RING(ring, submit->cmd[i].size);
     50			OUT_PKT2(ring);
     51			break;
     52		}
     53	}
     54
     55	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
     56	OUT_RING(ring, submit->seqno);
     57
     58	/* Flush HLSQ lazy updates to make sure there is nothing
     59	 * pending for indirect loads after the timestamp has
     60	 * passed:
     61	 */
     62	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
     63	OUT_RING(ring, HLSQ_FLUSH);
     64
     65	/* wait for idle before cache flush/interrupt */
     66	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
     67	OUT_RING(ring, 0x00000000);
     68
     69	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
     70	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
     71	OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
     72	OUT_RING(ring, rbmemptr(ring, fence));
     73	OUT_RING(ring, submit->seqno);
     74
     75#if 0
     76	/* Dummy set-constant to trigger context rollover */
     77	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
     78	OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
     79	OUT_RING(ring, 0x00000000);
     80#endif
     81
     82	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
     83}
     84
     85static bool a3xx_me_init(struct msm_gpu *gpu)
     86{
     87	struct msm_ringbuffer *ring = gpu->rb[0];
     88
     89	OUT_PKT3(ring, CP_ME_INIT, 17);
     90	OUT_RING(ring, 0x000003f7);
     91	OUT_RING(ring, 0x00000000);
     92	OUT_RING(ring, 0x00000000);
     93	OUT_RING(ring, 0x00000000);
     94	OUT_RING(ring, 0x00000080);
     95	OUT_RING(ring, 0x00000100);
     96	OUT_RING(ring, 0x00000180);
     97	OUT_RING(ring, 0x00006600);
     98	OUT_RING(ring, 0x00000150);
     99	OUT_RING(ring, 0x0000014e);
    100	OUT_RING(ring, 0x00000154);
    101	OUT_RING(ring, 0x00000001);
    102	OUT_RING(ring, 0x00000000);
    103	OUT_RING(ring, 0x00000000);
    104	OUT_RING(ring, 0x00000000);
    105	OUT_RING(ring, 0x00000000);
    106	OUT_RING(ring, 0x00000000);
    107
    108	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
    109	return a3xx_idle(gpu);
    110}
    111
    112static int a3xx_hw_init(struct msm_gpu *gpu)
    113{
    114	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    115	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
    116	uint32_t *ptr, len;
    117	int i, ret;
    118
    119	DBG("%s", gpu->name);
    120
    121	if (adreno_is_a305(adreno_gpu)) {
    122		/* Set up 16 deep read/write request queues: */
    123		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
    124		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
    125		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
    126		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
    127		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
    128		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
    129		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
    130		/* Enable WR-REQ: */
    131		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
    132		/* Set up round robin arbitration between both AXI ports: */
    133		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
    134		/* Set up AOOO: */
    135		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
    136		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
    137	} else if (adreno_is_a306(adreno_gpu)) {
    138		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
    139		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
    140		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
    141	} else if (adreno_is_a320(adreno_gpu)) {
    142		/* Set up 16 deep read/write request queues: */
    143		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
    144		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
    145		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
    146		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
    147		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
    148		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
    149		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
    150		/* Enable WR-REQ: */
    151		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
    152		/* Set up round robin arbitration between both AXI ports: */
    153		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
    154		/* Set up AOOO: */
    155		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
    156		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
    157		/* Enable 1K sort: */
    158		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
    159		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
    160
    161	} else if (adreno_is_a330v2(adreno_gpu)) {
    162		/*
    163		 * Most of the VBIF registers on 8974v2 have the correct
    164		 * values at power on, so we won't modify those if we don't
    165		 * need to
    166		 */
    167		/* Enable 1k sort: */
    168		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
    169		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
    170		/* Enable WR-REQ: */
    171		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
    172		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
    173		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
    174		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
    175
    176	} else if (adreno_is_a330(adreno_gpu)) {
    177		/* Set up 16 deep read/write request queues: */
    178		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
    179		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
    180		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
    181		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
    182		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
    183		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
    184		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
    185		/* Enable WR-REQ: */
    186		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
    187		/* Set up round robin arbitration between both AXI ports: */
    188		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
    189		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
    190		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
    191		/* Set up AOOO: */
    192		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
    193		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
    194		/* Enable 1K sort: */
    195		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
    196		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
    197		/* Disable VBIF clock gating. This is to enable AXI running
    198		 * higher frequency than GPU:
    199		 */
    200		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
    201
    202	} else {
    203		BUG();
    204	}
    205
    206	/* Make all blocks contribute to the GPU BUSY perf counter: */
    207	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
    208
    209	/* Tune the hystersis counters for SP and CP idle detection: */
    210	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
    211	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
    212
    213	/* Enable the RBBM error reporting bits.  This lets us get
    214	 * useful information on failure:
    215	 */
    216	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
    217
    218	/* Enable AHB error reporting: */
    219	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
    220
    221	/* Turn on the power counters: */
    222	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
    223
    224	/* Turn on hang detection - this spews a lot of useful information
    225	 * into the RBBM registers on a hang:
    226	 */
    227	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
    228
    229	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
    230	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
    231
    232	/* Enable Clock gating: */
    233	if (adreno_is_a306(adreno_gpu))
    234		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
    235	else if (adreno_is_a320(adreno_gpu))
    236		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
    237	else if (adreno_is_a330v2(adreno_gpu))
    238		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
    239	else if (adreno_is_a330(adreno_gpu))
    240		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
    241
    242	if (adreno_is_a330v2(adreno_gpu))
    243		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
    244	else if (adreno_is_a330(adreno_gpu))
    245		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
    246
    247	/* Set the OCMEM base address for A330, etc */
    248	if (a3xx_gpu->ocmem.hdl) {
    249		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
    250			(unsigned int)(a3xx_gpu->ocmem.base >> 14));
    251	}
    252
    253	/* Turn on performance counters: */
    254	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
    255
    256	/* Enable the perfcntrs that we use.. */
    257	for (i = 0; i < gpu->num_perfcntrs; i++) {
    258		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
    259		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
    260	}
    261
    262	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
    263
    264	ret = adreno_hw_init(gpu);
    265	if (ret)
    266		return ret;
    267
    268	/*
    269	 * Use the default ringbuffer size and block size but disable the RPTR
    270	 * shadow
    271	 */
    272	gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
    273		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
    274
    275	/* Set the ringbuffer address */
    276	gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
    277
    278	/* setup access protection: */
    279	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
    280
    281	/* RBBM registers */
    282	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
    283	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
    284	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
    285	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
    286	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
    287	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
    288
    289	/* CP registers */
    290	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
    291	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
    292	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
    293	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
    294	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
    295
    296	/* RB registers */
    297	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
    298
    299	/* VBIF registers */
    300	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
    301
    302	/* NOTE: PM4/micro-engine firmware registers look to be the same
    303	 * for a2xx and a3xx.. we could possibly push that part down to
    304	 * adreno_gpu base class.  Or push both PM4 and PFP but
    305	 * parameterize the pfp ucode addr/data registers..
    306	 */
    307
    308	/* Load PM4: */
    309	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
    310	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
    311	DBG("loading PM4 ucode version: %x", ptr[1]);
    312
    313	gpu_write(gpu, REG_AXXX_CP_DEBUG,
    314			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
    315			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
    316	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
    317	for (i = 1; i < len; i++)
    318		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
    319
    320	/* Load PFP: */
    321	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
    322	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
    323	DBG("loading PFP ucode version: %x", ptr[5]);
    324
    325	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
    326	for (i = 1; i < len; i++)
    327		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
    328
    329	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
    330	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
    331			adreno_is_a320(adreno_gpu)) {
    332		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
    333				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
    334				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
    335				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
    336	} else if (adreno_is_a330(adreno_gpu)) {
    337		/* NOTE: this (value take from downstream android driver)
    338		 * includes some bits outside of the known bitfields.  But
    339		 * A330 has this "MERCIU queue" thing too, which might
    340		 * explain a new bitfield or reshuffling:
    341		 */
    342		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
    343	}
    344
    345	/* clear ME_HALT to start micro engine */
    346	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
    347
    348	return a3xx_me_init(gpu) ? 0 : -EINVAL;
    349}
    350
    351static void a3xx_recover(struct msm_gpu *gpu)
    352{
    353	int i;
    354
    355	adreno_dump_info(gpu);
    356
    357	for (i = 0; i < 8; i++) {
    358		printk("CP_SCRATCH_REG%d: %u\n", i,
    359			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
    360	}
    361
    362	/* dump registers before resetting gpu, if enabled: */
    363	if (hang_debug)
    364		a3xx_dump(gpu);
    365
    366	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
    367	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
    368	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
    369	adreno_recover(gpu);
    370}
    371
    372static void a3xx_destroy(struct msm_gpu *gpu)
    373{
    374	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    375	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
    376
    377	DBG("%s", gpu->name);
    378
    379	adreno_gpu_cleanup(adreno_gpu);
    380
    381	adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
    382
    383	kfree(a3xx_gpu);
    384}
    385
    386static bool a3xx_idle(struct msm_gpu *gpu)
    387{
    388	/* wait for ringbuffer to drain: */
    389	if (!adreno_idle(gpu, gpu->rb[0]))
    390		return false;
    391
    392	/* then wait for GPU to finish: */
    393	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
    394			A3XX_RBBM_STATUS_GPU_BUSY))) {
    395		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
    396
    397		/* TODO maybe we need to reset GPU here to recover from hang? */
    398		return false;
    399	}
    400
    401	return true;
    402}
    403
    404static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
    405{
    406	uint32_t status;
    407
    408	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
    409	DBG("%s: %08x", gpu->name, status);
    410
    411	// TODO
    412
    413	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
    414
    415	msm_gpu_retire(gpu);
    416
    417	return IRQ_HANDLED;
    418}
    419
    420static const unsigned int a3xx_registers[] = {
    421	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
    422	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
    423	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
    424	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
    425	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
    426	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
    427	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
    428	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
    429	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
    430	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
    431	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
    432	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
    433	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
    434	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
    435	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
    436	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
    437	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
    438	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
    439	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
    440	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
    441	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
    442	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
    443	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
    444	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
    445	0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
    446	0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
    447	0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
    448	0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
    449	0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
    450	0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
    451	0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
    452	0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
    453	0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
    454	0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
    455	~0   /* sentinel */
    456};
    457
    458/* would be nice to not have to duplicate the _show() stuff with printk(): */
    459static void a3xx_dump(struct msm_gpu *gpu)
    460{
    461	printk("status:   %08x\n",
    462			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
    463	adreno_dump(gpu);
    464}
    465
    466static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
    467{
    468	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
    469
    470	if (!state)
    471		return ERR_PTR(-ENOMEM);
    472
    473	adreno_gpu_state_get(gpu, state);
    474
    475	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
    476
    477	return state;
    478}
    479
    480static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
    481{
    482	ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
    483	return ring->memptrs->rptr;
    484}
    485
    486static const struct adreno_gpu_funcs funcs = {
    487	.base = {
    488		.get_param = adreno_get_param,
    489		.set_param = adreno_set_param,
    490		.hw_init = a3xx_hw_init,
    491		.pm_suspend = msm_gpu_pm_suspend,
    492		.pm_resume = msm_gpu_pm_resume,
    493		.recover = a3xx_recover,
    494		.submit = a3xx_submit,
    495		.active_ring = adreno_active_ring,
    496		.irq = a3xx_irq,
    497		.destroy = a3xx_destroy,
    498#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
    499		.show = adreno_show,
    500#endif
    501		.gpu_state_get = a3xx_gpu_state_get,
    502		.gpu_state_put = adreno_gpu_state_put,
    503		.create_address_space = adreno_iommu_create_address_space,
    504		.get_rptr = a3xx_get_rptr,
    505	},
    506};
    507
    508static const struct msm_gpu_perfcntr perfcntrs[] = {
    509	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
    510			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
    511	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
    512			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
    513};
    514
    515struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
    516{
    517	struct a3xx_gpu *a3xx_gpu = NULL;
    518	struct adreno_gpu *adreno_gpu;
    519	struct msm_gpu *gpu;
    520	struct msm_drm_private *priv = dev->dev_private;
    521	struct platform_device *pdev = priv->gpu_pdev;
    522	struct icc_path *ocmem_icc_path;
    523	struct icc_path *icc_path;
    524	int ret;
    525
    526	if (!pdev) {
    527		DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
    528		ret = -ENXIO;
    529		goto fail;
    530	}
    531
    532	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
    533	if (!a3xx_gpu) {
    534		ret = -ENOMEM;
    535		goto fail;
    536	}
    537
    538	adreno_gpu = &a3xx_gpu->base;
    539	gpu = &adreno_gpu->base;
    540
    541	gpu->perfcntrs = perfcntrs;
    542	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
    543
    544	adreno_gpu->registers = a3xx_registers;
    545
    546	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
    547	if (ret)
    548		goto fail;
    549
    550	/* if needed, allocate gmem: */
    551	if (adreno_is_a330(adreno_gpu)) {
    552		ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
    553					    adreno_gpu, &a3xx_gpu->ocmem);
    554		if (ret)
    555			goto fail;
    556	}
    557
    558	if (!gpu->aspace) {
    559		/* TODO we think it is possible to configure the GPU to
    560		 * restrict access to VRAM carveout.  But the required
    561		 * registers are unknown.  For now just bail out and
    562		 * limp along with just modesetting.  If it turns out
    563		 * to not be possible to restrict access, then we must
    564		 * implement a cmdstream validator.
    565		 */
    566		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
    567		if (!allow_vram_carveout) {
    568			ret = -ENXIO;
    569			goto fail;
    570		}
    571	}
    572
    573	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
    574	if (IS_ERR(icc_path)) {
    575		ret = PTR_ERR(icc_path);
    576		goto fail;
    577	}
    578
    579	ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
    580	if (IS_ERR(ocmem_icc_path)) {
    581		ret = PTR_ERR(ocmem_icc_path);
    582		/* allow -ENODATA, ocmem icc is optional */
    583		if (ret != -ENODATA)
    584			goto fail;
    585		ocmem_icc_path = NULL;
    586	}
    587
    588
    589	/*
    590	 * Set the ICC path to maximum speed for now by multiplying the fastest
    591	 * frequency by the bus width (8). We'll want to scale this later on to
    592	 * improve battery life.
    593	 */
    594	icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
    595	icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
    596
    597	return gpu;
    598
    599fail:
    600	if (a3xx_gpu)
    601		a3xx_destroy(&a3xx_gpu->base.base);
    602
    603	return ERR_PTR(ret);
    604}