a6xx_gpu_state.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
a6xx_gpu_state.c (34711B)
      1// SPDX-License-Identifier: GPL-2.0
      2/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
      3
      4#include <linux/ascii85.h>
      5#include "msm_gem.h"
      6#include "a6xx_gpu.h"
      7#include "a6xx_gmu.h"
      8#include "a6xx_gpu_state.h"
      9#include "a6xx_gmu.xml.h"
     10
     11struct a6xx_gpu_state_obj {
     12	const void *handle;
     13	u32 *data;
     14};
     15
     16struct a6xx_gpu_state {
     17	struct msm_gpu_state base;
     18
     19	struct a6xx_gpu_state_obj *gmu_registers;
     20	int nr_gmu_registers;
     21
     22	struct a6xx_gpu_state_obj *registers;
     23	int nr_registers;
     24
     25	struct a6xx_gpu_state_obj *shaders;
     26	int nr_shaders;
     27
     28	struct a6xx_gpu_state_obj *clusters;
     29	int nr_clusters;
     30
     31	struct a6xx_gpu_state_obj *dbgahb_clusters;
     32	int nr_dbgahb_clusters;
     33
     34	struct a6xx_gpu_state_obj *indexed_regs;
     35	int nr_indexed_regs;
     36
     37	struct a6xx_gpu_state_obj *debugbus;
     38	int nr_debugbus;
     39
     40	struct a6xx_gpu_state_obj *vbif_debugbus;
     41
     42	struct a6xx_gpu_state_obj *cx_debugbus;
     43	int nr_cx_debugbus;
     44
     45	struct msm_gpu_state_bo *gmu_log;
     46	struct msm_gpu_state_bo *gmu_hfi;
     47	struct msm_gpu_state_bo *gmu_debug;
     48
     49	s32 hfi_queue_history[2][HFI_HISTORY_SZ];
     50
     51	struct list_head objs;
     52
     53	bool gpu_initialized;
     54};
     55
     56static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
     57{
     58	in[0] = val;
     59	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
     60
     61	return 2;
     62}
     63
     64static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
     65{
     66	in[0] = target;
     67	in[1] = (((u64) reg) << 44 | dwords);
     68
     69	return 2;
     70}
     71
     72static inline int CRASHDUMP_FINI(u64 *in)
     73{
     74	in[0] = 0;
     75	in[1] = 0;
     76
     77	return 2;
     78}
     79
     80struct a6xx_crashdumper {
     81	void *ptr;
     82	struct drm_gem_object *bo;
     83	u64 iova;
     84};
     85
     86struct a6xx_state_memobj {
     87	struct list_head node;
     88	unsigned long long data[];
     89};
     90
     91static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
     92{
     93	struct a6xx_state_memobj *obj =
     94		kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
     95
     96	if (!obj)
     97		return NULL;
     98
     99	list_add_tail(&obj->node, &a6xx_state->objs);
    100	return &obj->data;
    101}
    102
    103static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
    104		size_t size)
    105{
    106	void *dst = state_kcalloc(a6xx_state, 1, size);
    107
    108	if (dst)
    109		memcpy(dst, src, size);
    110	return dst;
    111}
    112
    113/*
    114 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
    115 * the rest for the data
    116 */
    117#define A6XX_CD_DATA_OFFSET 8192
    118#define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
    119
    120static int a6xx_crashdumper_init(struct msm_gpu *gpu,
    121		struct a6xx_crashdumper *dumper)
    122{
    123	dumper->ptr = msm_gem_kernel_new(gpu->dev,
    124		SZ_1M, MSM_BO_WC, gpu->aspace,
    125		&dumper->bo, &dumper->iova);
    126
    127	if (!IS_ERR(dumper->ptr))
    128		msm_gem_object_set_name(dumper->bo, "crashdump");
    129
    130	return PTR_ERR_OR_ZERO(dumper->ptr);
    131}
    132
    133static int a6xx_crashdumper_run(struct msm_gpu *gpu,
    134		struct a6xx_crashdumper *dumper)
    135{
    136	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    137	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
    138	u32 val;
    139	int ret;
    140
    141	if (IS_ERR_OR_NULL(dumper->ptr))
    142		return -EINVAL;
    143
    144	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
    145		return -EINVAL;
    146
    147	/* Make sure all pending memory writes are posted */
    148	wmb();
    149
    150	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
    151		REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
    152
    153	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
    154
    155	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
    156		val & 0x02, 100, 10000);
    157
    158	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
    159
    160	return ret;
    161}
    162
    163/* read a value from the GX debug bus */
    164static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
    165		u32 *data)
    166{
    167	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
    168		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
    169
    170	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
    171	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
    172	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
    173	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
    174
    175	/* Wait 1 us to make sure the data is flowing */
    176	udelay(1);
    177
    178	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
    179	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
    180
    181	return 2;
    182}
    183
    184#define cxdbg_write(ptr, offset, val) \
    185	msm_writel((val), (ptr) + ((offset) << 2))
    186
    187#define cxdbg_read(ptr, offset) \
    188	msm_readl((ptr) + ((offset) << 2))
    189
    190/* read a value from the CX debug bus */
    191static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
    192		u32 *data)
    193{
    194	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
    195		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
    196
    197	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
    198	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
    199	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
    200	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
    201
    202	/* Wait 1 us to make sure the data is flowing */
    203	udelay(1);
    204
    205	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
    206	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
    207
    208	return 2;
    209}
    210
    211/* Read a chunk of data from the VBIF debug bus */
    212static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
    213		u32 reg, int count, u32 *data)
    214{
    215	int i;
    216
    217	gpu_write(gpu, ctrl0, reg);
    218
    219	for (i = 0; i < count; i++) {
    220		gpu_write(gpu, ctrl1, i);
    221		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
    222	}
    223
    224	return count;
    225}
    226
    227#define AXI_ARB_BLOCKS 2
    228#define XIN_AXI_BLOCKS 5
    229#define XIN_CORE_BLOCKS 4
    230
    231#define VBIF_DEBUGBUS_BLOCK_SIZE \
    232	((16 * AXI_ARB_BLOCKS) + \
    233	 (18 * XIN_AXI_BLOCKS) + \
    234	 (12 * XIN_CORE_BLOCKS))
    235
    236static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
    237		struct a6xx_gpu_state *a6xx_state,
    238		struct a6xx_gpu_state_obj *obj)
    239{
    240	u32 clk, *ptr;
    241	int i;
    242
    243	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
    244		sizeof(u32));
    245	if (!obj->data)
    246		return;
    247
    248	obj->handle = NULL;
    249
    250	/* Get the current clock setting */
    251	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
    252
    253	/* Force on the bus so we can read it */
    254	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
    255		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
    256
    257	/* We will read from BUS2 first, so disable BUS1 */
    258	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
    259
    260	/* Enable the VBIF bus for reading */
    261	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
    262
    263	ptr = obj->data;
    264
    265	for (i = 0; i < AXI_ARB_BLOCKS; i++)
    266		ptr += vbif_debugbus_read(gpu,
    267			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
    268			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
    269			1 << (i + 16), 16, ptr);
    270
    271	for (i = 0; i < XIN_AXI_BLOCKS; i++)
    272		ptr += vbif_debugbus_read(gpu,
    273			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
    274			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
    275			1 << i, 18, ptr);
    276
    277	/* Stop BUS2 so we can turn on BUS1 */
    278	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
    279
    280	for (i = 0; i < XIN_CORE_BLOCKS; i++)
    281		ptr += vbif_debugbus_read(gpu,
    282			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
    283			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
    284			1 << i, 12, ptr);
    285
    286	/* Restore the VBIF clock setting */
    287	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
    288}
    289
    290static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
    291		struct a6xx_gpu_state *a6xx_state,
    292		const struct a6xx_debugbus_block *block,
    293		struct a6xx_gpu_state_obj *obj)
    294{
    295	int i;
    296	u32 *ptr;
    297
    298	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
    299	if (!obj->data)
    300		return;
    301
    302	obj->handle = block;
    303
    304	for (ptr = obj->data, i = 0; i < block->count; i++)
    305		ptr += debugbus_read(gpu, block->id, i, ptr);
    306}
    307
    308static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
    309		struct a6xx_gpu_state *a6xx_state,
    310		const struct a6xx_debugbus_block *block,
    311		struct a6xx_gpu_state_obj *obj)
    312{
    313	int i;
    314	u32 *ptr;
    315
    316	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
    317	if (!obj->data)
    318		return;
    319
    320	obj->handle = block;
    321
    322	for (ptr = obj->data, i = 0; i < block->count; i++)
    323		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
    324}
    325
    326static void a6xx_get_debugbus(struct msm_gpu *gpu,
    327		struct a6xx_gpu_state *a6xx_state)
    328{
    329	struct resource *res;
    330	void __iomem *cxdbg = NULL;
    331	int nr_debugbus_blocks;
    332
    333	/* Set up the GX debug bus */
    334
    335	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
    336		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
    337
    338	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
    339		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
    340
    341	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
    342	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
    343	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
    344	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
    345
    346	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
    347	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
    348
    349	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
    350	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
    351	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
    352	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
    353
    354	/* Set up the CX debug bus - it lives elsewhere in the system so do a
    355	 * temporary ioremap for the registers
    356	 */
    357	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
    358			"cx_dbgc");
    359
    360	if (res)
    361		cxdbg = ioremap(res->start, resource_size(res));
    362
    363	if (cxdbg) {
    364		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
    365			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
    366
    367		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
    368			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
    369
    370		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
    371		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
    372		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
    373		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
    374
    375		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
    376			0x76543210);
    377		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
    378			0xFEDCBA98);
    379
    380		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
    381		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
    382		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
    383		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
    384	}
    385
    386	nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
    387		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
    388
    389	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
    390			sizeof(*a6xx_state->debugbus));
    391
    392	if (a6xx_state->debugbus) {
    393		int i;
    394
    395		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
    396			a6xx_get_debugbus_block(gpu,
    397				a6xx_state,
    398				&a6xx_debugbus_blocks[i],
    399				&a6xx_state->debugbus[i]);
    400
    401		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
    402
    403		/*
    404		 * GBIF has same debugbus as of other GPU blocks, fall back to
    405		 * default path if GPU uses GBIF, also GBIF uses exactly same
    406		 * ID as of VBIF.
    407		 */
    408		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
    409			a6xx_get_debugbus_block(gpu, a6xx_state,
    410				&a6xx_gbif_debugbus_block,
    411				&a6xx_state->debugbus[i]);
    412
    413			a6xx_state->nr_debugbus += 1;
    414		}
    415	}
    416
    417	/*  Dump the VBIF debugbus on applicable targets */
    418	if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
    419		a6xx_state->vbif_debugbus =
    420			state_kcalloc(a6xx_state, 1,
    421					sizeof(*a6xx_state->vbif_debugbus));
    422
    423		if (a6xx_state->vbif_debugbus)
    424			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
    425					a6xx_state->vbif_debugbus);
    426	}
    427
    428	if (cxdbg) {
    429		a6xx_state->cx_debugbus =
    430			state_kcalloc(a6xx_state,
    431			ARRAY_SIZE(a6xx_cx_debugbus_blocks),
    432			sizeof(*a6xx_state->cx_debugbus));
    433
    434		if (a6xx_state->cx_debugbus) {
    435			int i;
    436
    437			for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
    438				a6xx_get_cx_debugbus_block(cxdbg,
    439					a6xx_state,
    440					&a6xx_cx_debugbus_blocks[i],
    441					&a6xx_state->cx_debugbus[i]);
    442
    443			a6xx_state->nr_cx_debugbus =
    444				ARRAY_SIZE(a6xx_cx_debugbus_blocks);
    445		}
    446
    447		iounmap(cxdbg);
    448	}
    449}
    450
    451#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
    452
    453/* Read a data cluster from behind the AHB aperture */
    454static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
    455		struct a6xx_gpu_state *a6xx_state,
    456		const struct a6xx_dbgahb_cluster *dbgahb,
    457		struct a6xx_gpu_state_obj *obj,
    458		struct a6xx_crashdumper *dumper)
    459{
    460	u64 *in = dumper->ptr;
    461	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
    462	size_t datasize;
    463	int i, regcount = 0;
    464
    465	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
    466		int j;
    467
    468		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
    469			(dbgahb->statetype + i * 2) << 8);
    470
    471		for (j = 0; j < dbgahb->count; j += 2) {
    472			int count = RANGE(dbgahb->registers, j);
    473			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
    474				dbgahb->registers[j] - (dbgahb->base >> 2);
    475
    476			in += CRASHDUMP_READ(in, offset, count, out);
    477
    478			out += count * sizeof(u32);
    479
    480			if (i == 0)
    481				regcount += count;
    482		}
    483	}
    484
    485	CRASHDUMP_FINI(in);
    486
    487	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
    488
    489	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
    490		return;
    491
    492	if (a6xx_crashdumper_run(gpu, dumper))
    493		return;
    494
    495	obj->handle = dbgahb;
    496	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
    497		datasize);
    498}
    499
    500static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
    501		struct a6xx_gpu_state *a6xx_state,
    502		struct a6xx_crashdumper *dumper)
    503{
    504	int i;
    505
    506	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
    507		ARRAY_SIZE(a6xx_dbgahb_clusters),
    508		sizeof(*a6xx_state->dbgahb_clusters));
    509
    510	if (!a6xx_state->dbgahb_clusters)
    511		return;
    512
    513	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
    514
    515	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
    516		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
    517			&a6xx_dbgahb_clusters[i],
    518			&a6xx_state->dbgahb_clusters[i], dumper);
    519}
    520
    521/* Read a data cluster from the CP aperture with the crashdumper */
    522static void a6xx_get_cluster(struct msm_gpu *gpu,
    523		struct a6xx_gpu_state *a6xx_state,
    524		const struct a6xx_cluster *cluster,
    525		struct a6xx_gpu_state_obj *obj,
    526		struct a6xx_crashdumper *dumper)
    527{
    528	u64 *in = dumper->ptr;
    529	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
    530	size_t datasize;
    531	int i, regcount = 0;
    532
    533	/* Some clusters need a selector register to be programmed too */
    534	if (cluster->sel_reg)
    535		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
    536
    537	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
    538		int j;
    539
    540		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
    541			(cluster->id << 8) | (i << 4) | i);
    542
    543		for (j = 0; j < cluster->count; j += 2) {
    544			int count = RANGE(cluster->registers, j);
    545
    546			in += CRASHDUMP_READ(in, cluster->registers[j],
    547				count, out);
    548
    549			out += count * sizeof(u32);
    550
    551			if (i == 0)
    552				regcount += count;
    553		}
    554	}
    555
    556	CRASHDUMP_FINI(in);
    557
    558	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
    559
    560	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
    561		return;
    562
    563	if (a6xx_crashdumper_run(gpu, dumper))
    564		return;
    565
    566	obj->handle = cluster;
    567	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
    568		datasize);
    569}
    570
    571static void a6xx_get_clusters(struct msm_gpu *gpu,
    572		struct a6xx_gpu_state *a6xx_state,
    573		struct a6xx_crashdumper *dumper)
    574{
    575	int i;
    576
    577	a6xx_state->clusters = state_kcalloc(a6xx_state,
    578		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
    579
    580	if (!a6xx_state->clusters)
    581		return;
    582
    583	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
    584
    585	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
    586		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
    587			&a6xx_state->clusters[i], dumper);
    588}
    589
    590/* Read a shader / debug block from the HLSQ aperture with the crashdumper */
    591static void a6xx_get_shader_block(struct msm_gpu *gpu,
    592		struct a6xx_gpu_state *a6xx_state,
    593		const struct a6xx_shader_block *block,
    594		struct a6xx_gpu_state_obj *obj,
    595		struct a6xx_crashdumper *dumper)
    596{
    597	u64 *in = dumper->ptr;
    598	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
    599	int i;
    600
    601	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
    602		return;
    603
    604	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
    605		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
    606			(block->type << 8) | i);
    607
    608		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
    609			block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
    610	}
    611
    612	CRASHDUMP_FINI(in);
    613
    614	if (a6xx_crashdumper_run(gpu, dumper))
    615		return;
    616
    617	obj->handle = block;
    618	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
    619		datasize);
    620}
    621
    622static void a6xx_get_shaders(struct msm_gpu *gpu,
    623		struct a6xx_gpu_state *a6xx_state,
    624		struct a6xx_crashdumper *dumper)
    625{
    626	int i;
    627
    628	a6xx_state->shaders = state_kcalloc(a6xx_state,
    629		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
    630
    631	if (!a6xx_state->shaders)
    632		return;
    633
    634	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
    635
    636	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
    637		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
    638			&a6xx_state->shaders[i], dumper);
    639}
    640
    641/* Read registers from behind the HLSQ aperture with the crashdumper */
    642static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
    643		struct a6xx_gpu_state *a6xx_state,
    644		const struct a6xx_registers *regs,
    645		struct a6xx_gpu_state_obj *obj,
    646		struct a6xx_crashdumper *dumper)
    647
    648{
    649	u64 *in = dumper->ptr;
    650	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
    651	int i, regcount = 0;
    652
    653	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
    654
    655	for (i = 0; i < regs->count; i += 2) {
    656		u32 count = RANGE(regs->registers, i);
    657		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
    658			regs->registers[i] - (regs->val0 >> 2);
    659
    660		in += CRASHDUMP_READ(in, offset, count, out);
    661
    662		out += count * sizeof(u32);
    663		regcount += count;
    664	}
    665
    666	CRASHDUMP_FINI(in);
    667
    668	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
    669		return;
    670
    671	if (a6xx_crashdumper_run(gpu, dumper))
    672		return;
    673
    674	obj->handle = regs;
    675	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
    676		regcount * sizeof(u32));
    677}
    678
    679/* Read a block of registers using the crashdumper */
    680static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
    681		struct a6xx_gpu_state *a6xx_state,
    682		const struct a6xx_registers *regs,
    683		struct a6xx_gpu_state_obj *obj,
    684		struct a6xx_crashdumper *dumper)
    685
    686{
    687	u64 *in = dumper->ptr;
    688	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
    689	int i, regcount = 0;
    690
    691	/* Some blocks might need to program a selector register first */
    692	if (regs->val0)
    693		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
    694
    695	for (i = 0; i < regs->count; i += 2) {
    696		u32 count = RANGE(regs->registers, i);
    697
    698		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
    699
    700		out += count * sizeof(u32);
    701		regcount += count;
    702	}
    703
    704	CRASHDUMP_FINI(in);
    705
    706	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
    707		return;
    708
    709	if (a6xx_crashdumper_run(gpu, dumper))
    710		return;
    711
    712	obj->handle = regs;
    713	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
    714		regcount * sizeof(u32));
    715}
    716
    717/* Read a block of registers via AHB */
    718static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
    719		struct a6xx_gpu_state *a6xx_state,
    720		const struct a6xx_registers *regs,
    721		struct a6xx_gpu_state_obj *obj)
    722{
    723	int i, regcount = 0, index = 0;
    724
    725	for (i = 0; i < regs->count; i += 2)
    726		regcount += RANGE(regs->registers, i);
    727
    728	obj->handle = (const void *) regs;
    729	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
    730	if (!obj->data)
    731		return;
    732
    733	for (i = 0; i < regs->count; i += 2) {
    734		u32 count = RANGE(regs->registers, i);
    735		int j;
    736
    737		for (j = 0; j < count; j++)
    738			obj->data[index++] = gpu_read(gpu,
    739				regs->registers[i] + j);
    740	}
    741}
    742
    743/* Read a block of GMU registers */
    744static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
    745		struct a6xx_gpu_state *a6xx_state,
    746		const struct a6xx_registers *regs,
    747		struct a6xx_gpu_state_obj *obj,
    748		bool rscc)
    749{
    750	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    751	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
    752	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
    753	int i, regcount = 0, index = 0;
    754
    755	for (i = 0; i < regs->count; i += 2)
    756		regcount += RANGE(regs->registers, i);
    757
    758	obj->handle = (const void *) regs;
    759	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
    760	if (!obj->data)
    761		return;
    762
    763	for (i = 0; i < regs->count; i += 2) {
    764		u32 count = RANGE(regs->registers, i);
    765		int j;
    766
    767		for (j = 0; j < count; j++) {
    768			u32 offset = regs->registers[i] + j;
    769			u32 val;
    770
    771			if (rscc)
    772				val = gmu_read_rscc(gmu, offset);
    773			else
    774				val = gmu_read(gmu, offset);
    775
    776			obj->data[index++] = val;
    777		}
    778	}
    779}
    780
    781static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
    782		struct a6xx_gpu_state *a6xx_state)
    783{
    784	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    785	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
    786
    787	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
    788		3, sizeof(*a6xx_state->gmu_registers));
    789
    790	if (!a6xx_state->gmu_registers)
    791		return;
    792
    793	a6xx_state->nr_gmu_registers = 3;
    794
    795	/* Get the CX GMU registers from AHB */
    796	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
    797		&a6xx_state->gmu_registers[0], false);
    798	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
    799		&a6xx_state->gmu_registers[1], true);
    800
    801	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
    802		return;
    803
    804	/* Set the fence to ALLOW mode so we can access the registers */
    805	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
    806
    807	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
    808		&a6xx_state->gmu_registers[2], false);
    809}
    810
    811static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
    812		struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
    813{
    814	struct msm_gpu_state_bo *snapshot;
    815
    816	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
    817	if (!snapshot)
    818		return NULL;
    819
    820	snapshot->iova = bo->iova;
    821	snapshot->size = bo->size;
    822	snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
    823	if (!snapshot->data)
    824		return NULL;
    825
    826	memcpy(snapshot->data, bo->virt, bo->size);
    827
    828	return snapshot;
    829}
    830
    831static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
    832					  struct a6xx_gpu_state *a6xx_state)
    833{
    834	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    835	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
    836	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
    837	unsigned i, j;
    838
    839	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
    840
    841	for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
    842		struct a6xx_hfi_queue *queue = &gmu->queues[i];
    843		for (j = 0; j < HFI_HISTORY_SZ; j++) {
    844			unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
    845			a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
    846		}
    847	}
    848}
    849
    850#define A6XX_GBIF_REGLIST_SIZE   1
    851static void a6xx_get_registers(struct msm_gpu *gpu,
    852		struct a6xx_gpu_state *a6xx_state,
    853		struct a6xx_crashdumper *dumper)
    854{
    855	int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
    856		ARRAY_SIZE(a6xx_reglist) +
    857		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
    858	int index = 0;
    859	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    860
    861	a6xx_state->registers = state_kcalloc(a6xx_state,
    862		count, sizeof(*a6xx_state->registers));
    863
    864	if (!a6xx_state->registers)
    865		return;
    866
    867	a6xx_state->nr_registers = count;
    868
    869	for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
    870		a6xx_get_ahb_gpu_registers(gpu,
    871			a6xx_state, &a6xx_ahb_reglist[i],
    872			&a6xx_state->registers[index++]);
    873
    874	if (a6xx_has_gbif(adreno_gpu))
    875		a6xx_get_ahb_gpu_registers(gpu,
    876				a6xx_state, &a6xx_gbif_reglist,
    877				&a6xx_state->registers[index++]);
    878	else
    879		a6xx_get_ahb_gpu_registers(gpu,
    880				a6xx_state, &a6xx_vbif_reglist,
    881				&a6xx_state->registers[index++]);
    882	if (!dumper) {
    883		/*
    884		 * We can't use the crashdumper when the SMMU is stalled,
    885		 * because the GPU has no memory access until we resume
    886		 * translation (but we don't want to do that until after
    887		 * we have captured as much useful GPU state as possible).
    888		 * So instead collect registers via the CPU:
    889		 */
    890		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
    891			a6xx_get_ahb_gpu_registers(gpu,
    892				a6xx_state, &a6xx_reglist[i],
    893				&a6xx_state->registers[index++]);
    894		return;
    895	}
    896
    897	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
    898		a6xx_get_crashdumper_registers(gpu,
    899			a6xx_state, &a6xx_reglist[i],
    900			&a6xx_state->registers[index++],
    901			dumper);
    902
    903	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
    904		a6xx_get_crashdumper_hlsq_registers(gpu,
    905			a6xx_state, &a6xx_hlsq_reglist[i],
    906			&a6xx_state->registers[index++],
    907			dumper);
    908}
    909
    910/* Read a block of data from an indexed register pair */
    911static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
    912		struct a6xx_gpu_state *a6xx_state,
    913		const struct a6xx_indexed_registers *indexed,
    914		struct a6xx_gpu_state_obj *obj)
    915{
    916	int i;
    917
    918	obj->handle = (const void *) indexed;
    919	obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
    920	if (!obj->data)
    921		return;
    922
    923	/* All the indexed banks start at address 0 */
    924	gpu_write(gpu, indexed->addr, 0);
    925
    926	/* Read the data - each read increments the internal address by 1 */
    927	for (i = 0; i < indexed->count; i++)
    928		obj->data[i] = gpu_read(gpu, indexed->data);
    929}
    930
    931static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
    932		struct a6xx_gpu_state *a6xx_state)
    933{
    934	u32 mempool_size;
    935	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
    936	int i;
    937
    938	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
    939		sizeof(*a6xx_state->indexed_regs));
    940	if (!a6xx_state->indexed_regs)
    941		return;
    942
    943	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
    944		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
    945			&a6xx_state->indexed_regs[i]);
    946
    947	/* Set the CP mempool size to 0 to stabilize it while dumping */
    948	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
    949	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
    950
    951	/* Get the contents of the CP mempool */
    952	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
    953		&a6xx_state->indexed_regs[i]);
    954
    955	/*
    956	 * Offset 0x2000 in the mempool is the size - copy the saved size over
    957	 * so the data is consistent
    958	 */
    959	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
    960
    961	/* Restore the size in the hardware */
    962	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
    963
    964	a6xx_state->nr_indexed_regs = count;
    965}
    966
    967struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
    968{
    969	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
    970	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
    971	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
    972	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
    973		GFP_KERNEL);
    974	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
    975			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
    976
    977	if (!a6xx_state)
    978		return ERR_PTR(-ENOMEM);
    979
    980	INIT_LIST_HEAD(&a6xx_state->objs);
    981
    982	/* Get the generic state from the adreno core */
    983	adreno_gpu_state_get(gpu, &a6xx_state->base);
    984
    985	a6xx_get_gmu_registers(gpu, a6xx_state);
    986
    987	a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
    988	a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
    989	a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
    990
    991	a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
    992
    993	/* If GX isn't on the rest of the data isn't going to be accessible */
    994	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
    995		return &a6xx_state->base;
    996
    997	/* Get the banks of indexed registers */
    998	a6xx_get_indexed_registers(gpu, a6xx_state);
    999
   1000	/*
   1001	 * Try to initialize the crashdumper, if we are not dumping state
   1002	 * with the SMMU stalled.  The crashdumper needs memory access to
   1003	 * write out GPU state, so we need to skip this when the SMMU is
   1004	 * stalled in response to an iova fault
   1005	 */
   1006	if (!stalled && !gpu->needs_hw_init &&
   1007	    !a6xx_crashdumper_init(gpu, &_dumper)) {
   1008		dumper = &_dumper;
   1009	}
   1010
   1011	a6xx_get_registers(gpu, a6xx_state, dumper);
   1012
   1013	if (dumper) {
   1014		a6xx_get_shaders(gpu, a6xx_state, dumper);
   1015		a6xx_get_clusters(gpu, a6xx_state, dumper);
   1016		a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
   1017
   1018		msm_gem_kernel_put(dumper->bo, gpu->aspace);
   1019	}
   1020
   1021	if (snapshot_debugbus)
   1022		a6xx_get_debugbus(gpu, a6xx_state);
   1023
   1024	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
   1025
   1026	return  &a6xx_state->base;
   1027}
   1028
   1029static void a6xx_gpu_state_destroy(struct kref *kref)
   1030{
   1031	struct a6xx_state_memobj *obj, *tmp;
   1032	struct msm_gpu_state *state = container_of(kref,
   1033			struct msm_gpu_state, ref);
   1034	struct a6xx_gpu_state *a6xx_state = container_of(state,
   1035			struct a6xx_gpu_state, base);
   1036
   1037	if (a6xx_state->gmu_log)
   1038		kvfree(a6xx_state->gmu_log->data);
   1039
   1040	if (a6xx_state->gmu_hfi)
   1041		kvfree(a6xx_state->gmu_hfi->data);
   1042
   1043	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
   1044		kfree(obj);
   1045
   1046	adreno_gpu_state_destroy(state);
   1047	kfree(a6xx_state);
   1048}
   1049
   1050int a6xx_gpu_state_put(struct msm_gpu_state *state)
   1051{
   1052	if (IS_ERR_OR_NULL(state))
   1053		return 1;
   1054
   1055	return kref_put(&state->ref, a6xx_gpu_state_destroy);
   1056}
   1057
   1058static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
   1059		struct drm_printer *p)
   1060{
   1061	int i, index = 0;
   1062
   1063	if (!data)
   1064		return;
   1065
   1066	for (i = 0; i < count; i += 2) {
   1067		u32 count = RANGE(registers, i);
   1068		u32 offset = registers[i];
   1069		int j;
   1070
   1071		for (j = 0; j < count; index++, offset++, j++) {
   1072			if (data[index] == 0xdeafbead)
   1073				continue;
   1074
   1075			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
   1076				offset << 2, data[index]);
   1077		}
   1078	}
   1079}
   1080
   1081static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
   1082{
   1083	char out[ASCII85_BUFSZ];
   1084	long i, l, datalen = 0;
   1085
   1086	for (i = 0; i < len >> 2; i++) {
   1087		if (data[i])
   1088			datalen = (i + 1) << 2;
   1089	}
   1090
   1091	if (datalen == 0)
   1092		return;
   1093
   1094	drm_puts(p, "    data: !!ascii85 |\n");
   1095	drm_puts(p, "      ");
   1096
   1097
   1098	l = ascii85_encode_len(datalen);
   1099
   1100	for (i = 0; i < l; i++)
   1101		drm_puts(p, ascii85_encode(data[i], out));
   1102
   1103	drm_puts(p, "\n");
   1104}
   1105
   1106static void print_name(struct drm_printer *p, const char *fmt, const char *name)
   1107{
   1108	drm_puts(p, fmt);
   1109	drm_puts(p, name);
   1110	drm_puts(p, "\n");
   1111}
   1112
   1113static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
   1114		struct drm_printer *p)
   1115{
   1116	const struct a6xx_shader_block *block = obj->handle;
   1117	int i;
   1118
   1119	if (!obj->handle)
   1120		return;
   1121
   1122	print_name(p, "  - type: ", block->name);
   1123
   1124	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
   1125		drm_printf(p, "    - bank: %d\n", i);
   1126		drm_printf(p, "      size: %d\n", block->size);
   1127
   1128		if (!obj->data)
   1129			continue;
   1130
   1131		print_ascii85(p, block->size << 2,
   1132			obj->data + (block->size * i));
   1133	}
   1134}
   1135
   1136static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
   1137		struct drm_printer *p)
   1138{
   1139	int ctx, index = 0;
   1140
   1141	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
   1142		int j;
   1143
   1144		drm_printf(p, "    - context: %d\n", ctx);
   1145
   1146		for (j = 0; j < size; j += 2) {
   1147			u32 count = RANGE(registers, j);
   1148			u32 offset = registers[j];
   1149			int k;
   1150
   1151			for (k = 0; k < count; index++, offset++, k++) {
   1152				if (data[index] == 0xdeafbead)
   1153					continue;
   1154
   1155				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
   1156					offset << 2, data[index]);
   1157			}
   1158		}
   1159	}
   1160}
   1161
   1162static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
   1163		struct drm_printer *p)
   1164{
   1165	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
   1166
   1167	if (dbgahb) {
   1168		print_name(p, "  - cluster-name: ", dbgahb->name);
   1169		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
   1170			obj->data, p);
   1171	}
   1172}
   1173
   1174static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
   1175		struct drm_printer *p)
   1176{
   1177	const struct a6xx_cluster *cluster = obj->handle;
   1178
   1179	if (cluster) {
   1180		print_name(p, "  - cluster-name: ", cluster->name);
   1181		a6xx_show_cluster_data(cluster->registers, cluster->count,
   1182			obj->data, p);
   1183	}
   1184}
   1185
   1186static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
   1187		struct drm_printer *p)
   1188{
   1189	const struct a6xx_indexed_registers *indexed = obj->handle;
   1190
   1191	if (!indexed)
   1192		return;
   1193
   1194	print_name(p, "  - regs-name: ", indexed->name);
   1195	drm_printf(p, "    dwords: %d\n", indexed->count);
   1196
   1197	print_ascii85(p, indexed->count << 2, obj->data);
   1198}
   1199
   1200static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
   1201		u32 *data, struct drm_printer *p)
   1202{
   1203	if (block) {
   1204		print_name(p, "  - debugbus-block: ", block->name);
   1205
   1206		/*
   1207		 * count for regular debugbus data is in quadwords,
   1208		 * but print the size in dwords for consistency
   1209		 */
   1210		drm_printf(p, "    count: %d\n", block->count << 1);
   1211
   1212		print_ascii85(p, block->count << 3, data);
   1213	}
   1214}
   1215
   1216static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
   1217		struct drm_printer *p)
   1218{
   1219	int i;
   1220
   1221	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
   1222		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
   1223
   1224		a6xx_show_debugbus_block(obj->handle, obj->data, p);
   1225	}
   1226
   1227	if (a6xx_state->vbif_debugbus) {
   1228		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
   1229
   1230		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
   1231		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
   1232
   1233		/* vbif debugbus data is in dwords.  Confusing, huh? */
   1234		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
   1235	}
   1236
   1237	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
   1238		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
   1239
   1240		a6xx_show_debugbus_block(obj->handle, obj->data, p);
   1241	}
   1242}
   1243
   1244void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
   1245		struct drm_printer *p)
   1246{
   1247	struct a6xx_gpu_state *a6xx_state = container_of(state,
   1248			struct a6xx_gpu_state, base);
   1249	int i;
   1250
   1251	if (IS_ERR_OR_NULL(state))
   1252		return;
   1253
   1254	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
   1255
   1256	adreno_show(gpu, state, p);
   1257
   1258	drm_puts(p, "gmu-log:\n");
   1259	if (a6xx_state->gmu_log) {
   1260		struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
   1261
   1262		drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
   1263		drm_printf(p, "    size: %zu\n", gmu_log->size);
   1264		adreno_show_object(p, &gmu_log->data, gmu_log->size,
   1265				&gmu_log->encoded);
   1266	}
   1267
   1268	drm_puts(p, "gmu-hfi:\n");
   1269	if (a6xx_state->gmu_hfi) {
   1270		struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
   1271		unsigned i, j;
   1272
   1273		drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
   1274		drm_printf(p, "    size: %zu\n", gmu_hfi->size);
   1275		for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
   1276			drm_printf(p, "    queue-history[%u]:", i);
   1277			for (j = 0; j < HFI_HISTORY_SZ; j++) {
   1278				drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
   1279			}
   1280			drm_printf(p, "\n");
   1281		}
   1282		adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
   1283				&gmu_hfi->encoded);
   1284	}
   1285
   1286	drm_puts(p, "gmu-debug:\n");
   1287	if (a6xx_state->gmu_debug) {
   1288		struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
   1289
   1290		drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
   1291		drm_printf(p, "    size: %zu\n", gmu_debug->size);
   1292		adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
   1293				&gmu_debug->encoded);
   1294	}
   1295
   1296	drm_puts(p, "registers:\n");
   1297	for (i = 0; i < a6xx_state->nr_registers; i++) {
   1298		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
   1299		const struct a6xx_registers *regs = obj->handle;
   1300
   1301		if (!obj->handle)
   1302			continue;
   1303
   1304		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
   1305	}
   1306
   1307	drm_puts(p, "registers-gmu:\n");
   1308	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
   1309		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
   1310		const struct a6xx_registers *regs = obj->handle;
   1311
   1312		if (!obj->handle)
   1313			continue;
   1314
   1315		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
   1316	}
   1317
   1318	drm_puts(p, "indexed-registers:\n");
   1319	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
   1320		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
   1321
   1322	drm_puts(p, "shader-blocks:\n");
   1323	for (i = 0; i < a6xx_state->nr_shaders; i++)
   1324		a6xx_show_shader(&a6xx_state->shaders[i], p);
   1325
   1326	drm_puts(p, "clusters:\n");
   1327	for (i = 0; i < a6xx_state->nr_clusters; i++)
   1328		a6xx_show_cluster(&a6xx_state->clusters[i], p);
   1329
   1330	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
   1331		a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
   1332
   1333	drm_puts(p, "debugbus:\n");
   1334	a6xx_show_debugbus(a6xx_state, p);
   1335}