[PATCH 6/6] drm/msm/a6xx: Add a6xx gpu state

Jordan Crouse <jcrouse@xxxxxxxxxxxxxx> · Fri, 14 Sep 2018 09:19:35 -0600

Add support for gathering and dumping the a6xx GPU state including
registers, GMU registers, indexed registers, shader blocks,
context clusters and debugbus.

Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx>
---
 drivers/gpu/drm/msm/Makefile                |    1 +
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c       |   25 +-
 drivers/gpu/drm/msm/adreno/a6xx_gmu.h       |    3 +
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c       |   39 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h       |    6 +
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 1159 +++++++++++++++++++
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h |  430 +++++++
 7 files changed, 1625 insertions(+), 38 deletions(-)
 create mode 100644 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
 create mode 100644 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h

diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 19ab521d4c3a..33645c6539ee 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -14,6 +14,7 @@ msm-y := \
 	adreno/a6xx_gpu.o \
 	adreno/a6xx_gmu.o \
 	adreno/a6xx_hfi.o \
+	adreno/a6xx_gpu_state.o \
 	hdmi/hdmi.o \
 	hdmi/hdmi_audio.o \
 	hdmi/hdmi_bridge.o \
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index d04b63ea8cd9..ad2a887ce700 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -55,10 +55,31 @@ static irqreturn_t a6xx_hfi_irq(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu)
+{
+	u32 val;
+
+	/* This can be called from gpu state code so make sure GMU is valid */
+	if (IS_ERR_OR_NULL(gmu->mmio))
+		return false;
+
+	val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS);
+
+	return !(val &
+		(A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SPTPRAC_GDSC_POWER_OFF |
+		A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SP_CLOCK_OFF));
+}
+
 /* Check to see if the GX rail is still powered */
-static bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
+bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
 {
-	u32 val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS);
+	u32 val;
+
+	/* This can be called from gpu state code so make sure GMU is valid */
+	if (IS_ERR_OR_NULL(gmu->mmio))
+		return false;
+
+	val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS);
 
 	return !(val &
 		(A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF |
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index 09d97e4ed293..9683e65b0783 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -153,4 +153,7 @@ void a6xx_hfi_stop(struct a6xx_gmu *gmu);
 
 void a6xx_hfi_task(unsigned long data);
 
+bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu);
+bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu);
+
 #endif
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 9a14cb3d5027..69700806ee9f 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -645,33 +645,6 @@ static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL),
 };
 
-static const u32 a6xx_registers[] = {
-	0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001b,
-	0x001e, 0x0032, 0x0038, 0x003c, 0x0042, 0x0042, 0x0044, 0x0044,
-	0x0047, 0x0047, 0x0056, 0x0056, 0x00ad, 0x00ae, 0x00b0, 0x00fb,
-	0x0100, 0x011d, 0x0200, 0x020d, 0x0210, 0x0213, 0x0218, 0x023d,
-	0x0400, 0x04f9, 0x0500, 0x0500, 0x0505, 0x050b, 0x050e, 0x0511,
-	0x0533, 0x0533, 0x0540, 0x0555, 0x0800, 0x0808, 0x0810, 0x0813,
-	0x0820, 0x0821, 0x0823, 0x0827, 0x0830, 0x0833, 0x0840, 0x0843,
-	0x084f, 0x086f, 0x0880, 0x088a, 0x08a0, 0x08ab, 0x08c0, 0x08c4,
-	0x08d0, 0x08dd, 0x08f0, 0x08f3, 0x0900, 0x0903, 0x0908, 0x0911,
-	0x0928, 0x093e, 0x0942, 0x094d, 0x0980, 0x0984, 0x098d, 0x0996,
-	0x0998, 0x099e, 0x09a0, 0x09a6, 0x09a8, 0x09ae, 0x09b0, 0x09b1,
-	0x09c2, 0x09c8, 0x0a00, 0x0a03, 0x0c00, 0x0c04, 0x0c06, 0x0c06,
-	0x0c10, 0x0cd9, 0x0e00, 0x0e0e, 0x0e10, 0x0e13, 0x0e17, 0x0e19,
-	0x0e1c, 0x0e2b, 0x0e30, 0x0e32, 0x0e38, 0x0e39, 0x8600, 0x8601,
-	0x8610, 0x861b, 0x8620, 0x8620, 0x8628, 0x862b, 0x8630, 0x8637,
-	0x8e01, 0x8e01, 0x8e04, 0x8e05, 0x8e07, 0x8e08, 0x8e0c, 0x8e0c,
-	0x8e10, 0x8e1c, 0x8e20, 0x8e25, 0x8e28, 0x8e28, 0x8e2c, 0x8e2f,
-	0x8e3b, 0x8e3e, 0x8e40, 0x8e43, 0x8e50, 0x8e5e, 0x8e70, 0x8e77,
-	0x9600, 0x9604, 0x9624, 0x9637, 0x9e00, 0x9e01, 0x9e03, 0x9e0e,
-	0x9e11, 0x9e16, 0x9e19, 0x9e19, 0x9e1c, 0x9e1c, 0x9e20, 0x9e23,
-	0x9e30, 0x9e31, 0x9e34, 0x9e34, 0x9e70, 0x9e72, 0x9e78, 0x9e79,
-	0x9e80, 0x9fff, 0xa600, 0xa601, 0xa603, 0xa603, 0xa60a, 0xa60a,
-	0xa610, 0xa617, 0xa630, 0xa630,
-	~0
-};
-
 static int a6xx_pm_resume(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -720,14 +693,6 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 	return 0;
 }
 
-#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
-static void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
-		struct drm_printer *p)
-{
-	adreno_show(gpu, state, p);
-}
-#endif
-
 static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -768,6 +733,8 @@ static const struct adreno_gpu_funcs funcs = {
 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 		.show = a6xx_show,
 #endif
+		.gpu_state_get = a6xx_gpu_state_get,
+		.gpu_state_put = a6xx_gpu_state_put,
 	},
 	.get_timestamp = a6xx_get_timestamp,
 };
@@ -789,7 +756,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
 	adreno_gpu = &a6xx_gpu->base;
 	gpu = &adreno_gpu->base;
 
-	adreno_gpu->registers = a6xx_registers;
+	adreno_gpu->registers = NULL;
 	adreno_gpu->reg_offsets = a6xx_register_offsets;
 
 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index dd69e5b0e692..1e8f8ae93894 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -57,4 +57,10 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state);
 int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
 void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu);
 
+void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
+		struct drm_printer *p);
+
+struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu);
+int a6xx_gpu_state_put(struct msm_gpu_state *state);
+
 #endif /* __A6XX_GPU_H__ */
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
new file mode 100644
index 000000000000..09f8110ca6e4
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -0,0 +1,1159 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */
+
+#include <linux/ascii85.h>
+#include "msm_gem.h"
+#include "a6xx_gpu.h"
+#include "a6xx_gmu.h"
+#include "a6xx_gpu_state.h"
+#include "a6xx_gmu.xml.h"
+
+struct a6xx_gpu_state_obj {
+	const void *handle;
+	u32 *data;
+};
+
+struct a6xx_gpu_state {
+	struct msm_gpu_state base;
+
+	struct a6xx_gpu_state_obj *gmu_registers;
+	int nr_gmu_registers;
+
+	struct a6xx_gpu_state_obj *registers;
+	int nr_registers;
+
+	struct a6xx_gpu_state_obj *shaders;
+	int nr_shaders;
+
+	struct a6xx_gpu_state_obj *clusters;
+	int nr_clusters;
+
+	struct a6xx_gpu_state_obj *dbgahb_clusters;
+	int nr_dbgahb_clusters;
+
+	struct a6xx_gpu_state_obj *indexed_regs;
+	int nr_indexed_regs;
+
+	struct a6xx_gpu_state_obj *debugbus;
+	int nr_debugbus;
+
+	struct a6xx_gpu_state_obj *vbif_debugbus;
+
+	struct a6xx_gpu_state_obj *cx_debugbus;
+	int nr_cx_debugbus;
+};
+
+static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
+{
+	in[0] = val;
+	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
+
+	return 2;
+}
+
+static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
+{
+	in[0] = target;
+	in[1] = (((u64) reg) << 44 | dwords);
+
+	return 2;
+}
+
+static inline int CRASHDUMP_FINI(u64 *in)
+{
+	in[0] = 0;
+	in[1] = 0;
+
+	return 2;
+}
+
+struct a6xx_crashdumper {
+	void *ptr;
+	struct drm_gem_object *bo;
+	u64 iova;
+};
+
+/*
+ * Allocate 1MB for the crashdumper scratch region - 8k for the script and
+ * the rest for the data
+ */
+#define A6XX_CD_DATA_OFFSET 8192
+#define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
+
+static int a6xx_crashdumper_init(struct msm_gpu *gpu,
+		struct a6xx_crashdumper *dumper)
+{
+	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
+		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
+		&dumper->bo, &dumper->iova);
+
+	return IS_ERR(dumper->ptr) ? PTR_ERR(dumper->ptr) : 0;
+}
+
+static int a6xx_crashdumper_run(struct msm_gpu *gpu,
+		struct a6xx_crashdumper *dumper)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+	u32 val;
+	int ret;
+
+	if (IS_ERR_OR_NULL(dumper->ptr))
+		return -EINVAL;
+
+	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
+		return -EINVAL;
+
+	/* Make sure all pending memory writes are posted */
+	wmb();
+
+	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
+		REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
+
+	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
+
+	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
+		val & 0x02, 100, 10000);
+
+	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
+
+	return ret;
+}
+
+static void a6xx_crashdumper_free(struct msm_gpu *gpu,
+		struct a6xx_crashdumper *dumper)
+{
+	msm_gem_put_iova(dumper->bo, gpu->aspace);
+	msm_gem_put_vaddr(dumper->bo);
+
+	drm_gem_object_unreference(dumper->bo);
+}
+
+/* read a value from the GX debug bus */
+static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
+		u32 *data)
+{
+	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
+		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
+
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
+
+	/* Wait 1 us to make sure the data is flowing */
+	udelay(1);
+
+	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
+	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
+
+	return 2;
+}
+
+#define cxdbg_write(ptr, offset, val) \
+	msm_writel((val), (ptr) + ((offset) << 2))
+
+#define cxdbg_read(ptr, offset) \
+	msm_readl((ptr) + ((offset) << 2))
+
+/* read a value from the CX debug bus */
+static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset,
+		u32 *data)
+{
+	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
+		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
+
+	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
+	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
+	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
+	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
+
+	/* Wait 1 us to make sure the data is flowing */
+	udelay(1);
+
+	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
+	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
+
+	return 2;
+}
+
+/* Read a chunk of data from the VBIF debug bus */
+static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
+		u32 reg, int count, u32 *data)
+{
+	int i;
+
+	gpu_write(gpu, ctrl0, reg);
+
+	for (i = 0; i < count; i++) {
+		gpu_write(gpu, ctrl1, i);
+		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
+	}
+
+	return count;
+}
+
+#define AXI_ARB_BLOCKS 2
+#define XIN_AXI_BLOCKS 5
+#define XIN_CORE_BLOCKS 4
+
+#define VBIF_DEBUGBUS_BLOCK_SIZE \
+	((16 * AXI_ARB_BLOCKS) + \
+	 (18 * XIN_AXI_BLOCKS) + \
+	 (12 * XIN_CORE_BLOCKS))
+
+static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
+		struct a6xx_gpu_state_obj *obj)
+{
+	u32 clk, *ptr;
+	int i;
+
+	obj->data = kcalloc(VBIF_DEBUGBUS_BLOCK_SIZE, sizeof(u32), GFP_KERNEL);
+	obj->handle = NULL;
+
+	/* Get the current clock setting */
+	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
+
+	/* Force on the bus so we can read it */
+	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
+		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
+
+	/* We will read from BUS2 first, so disable BUS1 */
+	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
+
+	/* Enable the VBIF bus for reading */
+	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
+
+	ptr = obj->data;
+
+	for (i = 0; i < AXI_ARB_BLOCKS; i++)
+		ptr += vbif_debugbus_read(gpu,
+			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
+			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
+			1 << (i + 16), 16, ptr);
+
+	for (i = 0; i < XIN_AXI_BLOCKS; i++)
+		ptr += vbif_debugbus_read(gpu,
+			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
+			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
+			1 << i, 18, ptr);
+
+	/* Stop BUS2 so we can turn on BUS1 */
+	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
+
+	for (i = 0; i < XIN_CORE_BLOCKS; i++)
+		ptr += vbif_debugbus_read(gpu,
+			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
+			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
+			1 << i, 12, ptr);
+
+	/* Restore the VBIF clock setting */
+	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
+}
+
+static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
+		const struct a6xx_debugbus_block *block,
+		struct a6xx_gpu_state_obj *obj)
+{
+	int i;
+	u32 *ptr;
+
+	obj->data = kcalloc(block->count, sizeof(u64), GFP_KERNEL);
+	if (!obj->data)
+		return;
+
+	obj->handle = block;
+
+	for (ptr = obj->data, i = 0; i < block->count; i++)
+		ptr += debugbus_read(gpu, block->id, i, ptr);
+}
+
+static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
+		const struct a6xx_debugbus_block *block,
+		struct a6xx_gpu_state_obj *obj)
+{
+	int i;
+	u32 *ptr;
+
+	obj->data = kcalloc(block->count, sizeof(u64), GFP_KERNEL);
+	if (!obj->data)
+		return;
+
+	obj->handle = block;
+
+	for (ptr = obj->data, i = 0; i < block->count; i++)
+		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
+}
+
+static void a6xx_get_debugbus(struct msm_gpu *gpu,
+		struct a6xx_gpu_state *a6xx_state)
+{
+	struct resource *res;
+	void __iomem *cxdbg = NULL;
+
+	/* Set up the GX debug bus */
+
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
+		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
+
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
+		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
+
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
+
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
+
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
+	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
+
+	/* Set up the CX debug bus - it lives elsewhere in the system so do a
+	 * temporary ioremap for the registers
+	 */
+	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
+			"cx_dbgc");
+
+	if (res)
+		cxdbg = ioremap(res->start, resource_size(res));
+
+	if (cxdbg) {
+		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
+			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
+
+		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
+			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
+
+		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
+		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
+		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
+		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
+
+		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0,
+			0x76543210);
+		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1,
+			0xFEDCBA98);
+
+		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
+		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
+		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
+		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
+	}
+
+	a6xx_state->debugbus = kcalloc(ARRAY_SIZE(a6xx_debugbus_blocks),
+		sizeof(*a6xx_state->debugbus), GFP_KERNEL);
+
+	if (a6xx_state->debugbus) {
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
+			a6xx_get_debugbus_block(gpu,
+				&a6xx_debugbus_blocks[i],
+				&a6xx_state->debugbus[i]);
+
+		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
+	}
+
+	a6xx_state->vbif_debugbus = kzalloc(sizeof(*a6xx_state->vbif_debugbus),
+		GFP_KERNEL);
+
+	if (a6xx_state->vbif_debugbus)
+		a6xx_get_vbif_debugbus_block(gpu, a6xx_state->vbif_debugbus);
+
+	if (cxdbg) {
+		a6xx_state->cx_debugbus =
+			kcalloc(ARRAY_SIZE(a6xx_cx_debugbus_blocks),
+			sizeof(*a6xx_state->cx_debugbus), GFP_KERNEL);
+
+		if (a6xx_state->cx_debugbus) {
+			int i;
+
+			for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
+				a6xx_get_cx_debugbus_block(cxdbg,
+					&a6xx_cx_debugbus_blocks[i],
+					&a6xx_state->cx_debugbus[i]);
+
+			a6xx_state->nr_cx_debugbus =
+				ARRAY_SIZE(a6xx_cx_debugbus_blocks);
+		}
+
+		iounmap(cxdbg);
+	}
+}
+
+#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
+
+/* Read a data cluster from behind the AHB aperture */
+static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
+		const struct a6xx_dbgahb_cluster *dbgahb,
+		struct a6xx_gpu_state_obj *obj,
+		struct a6xx_crashdumper *dumper)
+{
+	u64 *in = dumper->ptr;
+	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
+	size_t datasize;
+	int i, regcount = 0;
+
+	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
+		int j;
+
+		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
+			(dbgahb->statetype + i * 2) << 8);
+
+		for (j = 0; j < dbgahb->count; j += 2) {
+			int count = RANGE(dbgahb->registers, j);
+			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
+				dbgahb->registers[j] - (dbgahb->base >> 2);
+
+			in += CRASHDUMP_READ(in, offset, count, out);
+
+			out += count * sizeof(u32);
+
+			if (i == 0)
+				regcount += count;
+		}
+	}
+
+	CRASHDUMP_FINI(in);
+
+	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
+
+	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
+		return;
+
+	if (a6xx_crashdumper_run(gpu, dumper))
+		return;
+
+	obj->handle = dbgahb;
+	obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET,
+		datasize, GFP_KERNEL);
+}
+
+static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
+		struct a6xx_gpu_state *a6xx_state,
+		struct a6xx_crashdumper *dumper)
+{
+	int i;
+
+	a6xx_state->dbgahb_clusters = kcalloc(ARRAY_SIZE(a6xx_dbgahb_clusters),
+		sizeof(*a6xx_state->dbgahb_clusters), GFP_KERNEL);
+
+	if (!a6xx_state->dbgahb_clusters)
+		return;
+
+	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
+		a6xx_get_dbgahb_cluster(gpu, &a6xx_dbgahb_clusters[i],
+			&a6xx_state->dbgahb_clusters[i], dumper);
+}
+
+/* Read a data cluster from the CP aperture with the crashdumper */
+static void a6xx_get_cluster(struct msm_gpu *gpu,
+		const struct a6xx_cluster *cluster,
+		struct a6xx_gpu_state_obj *obj,
+		struct a6xx_crashdumper *dumper)
+{
+	u64 *in = dumper->ptr;
+	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
+	size_t datasize;
+	int i, regcount = 0;
+
+	/* Some clusters need a selector register to be programmed too */
+	if (cluster->sel_reg)
+		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
+
+	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
+		int j;
+
+		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
+			(cluster->id << 8) | (i << 4) | i);
+
+		for (j = 0; j < cluster->count; j += 2) {
+			int count = RANGE(cluster->registers, j);
+
+			in += CRASHDUMP_READ(in, cluster->registers[j],
+				count, out);
+
+			out += count * sizeof(u32);
+
+			if (i == 0)
+				regcount += count;
+		}
+	}
+
+	CRASHDUMP_FINI(in);
+
+	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
+
+	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
+		return;
+
+	if (a6xx_crashdumper_run(gpu, dumper))
+		return;
+
+	obj->handle = cluster;
+	obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET,
+		datasize, GFP_KERNEL);
+}
+
+static void a6xx_get_clusters(struct msm_gpu *gpu,
+		struct a6xx_gpu_state *a6xx_state,
+		struct a6xx_crashdumper *dumper)
+{
+	int i;
+
+	a6xx_state->clusters = kcalloc(ARRAY_SIZE(a6xx_clusters),
+		sizeof(*a6xx_state->clusters), GFP_KERNEL);
+
+	if (!a6xx_state->clusters)
+		return;
+
+	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
+		a6xx_get_cluster(gpu, &a6xx_clusters[i],
+			&a6xx_state->clusters[i], dumper);
+}
+
+/* Read a shader / debug block from the HLSQ aperture with the crashdumper */
+static void a6xx_get_shader_block(struct msm_gpu *gpu,
+		const struct a6xx_shader_block *block,
+		struct a6xx_gpu_state_obj *obj,
+		struct a6xx_crashdumper *dumper)
+{
+	u64 *in = dumper->ptr;
+	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
+	int i;
+
+	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
+		return;
+
+	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
+		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
+			(block->type << 8) | i);
+
+		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
+			block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
+	}
+
+	CRASHDUMP_FINI(in);
+
+	if (a6xx_crashdumper_run(gpu, dumper))
+		return;
+
+	obj->handle = block;
+	obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET,
+		datasize, GFP_KERNEL);
+}
+
+static void a6xx_get_shaders(struct msm_gpu *gpu,
+		struct a6xx_gpu_state *a6xx_state,
+		struct a6xx_crashdumper *dumper)
+{
+	int i;
+
+	a6xx_state->shaders = kcalloc(ARRAY_SIZE(a6xx_shader_blocks),
+		sizeof(*a6xx_state->shaders), GFP_KERNEL);
+
+	if (!a6xx_state->shaders)
+		return;
+
+	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
+		a6xx_get_shader_block(gpu, &a6xx_shader_blocks[i],
+			&a6xx_state->shaders[i], dumper);
+}
+
+/* Read registers from behind the HLSQ aperture with the crashdumper */
+static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
+		const struct a6xx_registers *regs,
+		struct a6xx_gpu_state_obj *obj,
+		struct a6xx_crashdumper *dumper)
+
+{
+	u64 *in = dumper->ptr;
+	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
+	int i, regcount = 0;
+
+	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
+
+	for (i = 0; i < regs->count; i += 2) {
+		u32 count = RANGE(regs->registers, i);
+		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
+			regs->registers[i] - (regs->val0 >> 2);
+
+		in += CRASHDUMP_READ(in, offset, count, out);
+
+		out += count * sizeof(u32);
+		regcount += count;
+	}
+
+	CRASHDUMP_FINI(in);
+
+	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
+		return;
+
+	if (a6xx_crashdumper_run(gpu, dumper))
+		return;
+
+	obj->handle = regs;
+	obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET,
+		regcount * sizeof(u32), GFP_KERNEL);
+}
+
+/* Read a block of registers using the crashdumper */
+static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
+		const struct a6xx_registers *regs,
+		struct a6xx_gpu_state_obj *obj,
+		struct a6xx_crashdumper *dumper)
+
+{
+	u64 *in = dumper->ptr;
+	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
+	int i, regcount = 0;
+
+	/* Some blocks might need to program a selector register first */
+	if (regs->val0)
+		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
+
+	for (i = 0; i < regs->count; i += 2) {
+		u32 count = RANGE(regs->registers, i);
+
+		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
+
+		out += count * sizeof(u32);
+		regcount += count;
+	}
+
+	CRASHDUMP_FINI(in);
+
+	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
+		return;
+
+	if (a6xx_crashdumper_run(gpu, dumper))
+		return;
+
+	obj->handle = regs;
+	obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET,
+		regcount * sizeof(u32), GFP_KERNEL);
+}
+
+/* Read a block of registers via AHB */
+static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
+		const struct a6xx_registers *regs,
+		struct a6xx_gpu_state_obj *obj)
+{
+	int i, regcount = 0, index = 0;
+
+	for (i = 0; i < regs->count; i += 2)
+		regcount += RANGE(regs->registers, i);
+
+	obj->handle = (const void *) regs;
+	obj->data = kcalloc(regcount, sizeof(u32), GFP_KERNEL);
+	if (!obj->data)
+		return;
+
+	for (i = 0; i < regs->count; i += 2) {
+		u32 count = RANGE(regs->registers, i);
+		int j;
+
+		for (j = 0; j < count; j++)
+			obj->data[index++] = gpu_read(gpu,
+				regs->registers[i] + j);
+	}
+}
+
+/* Read a block of GMU registers */
+static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
+		const struct a6xx_registers *regs,
+		struct a6xx_gpu_state_obj *obj)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+	int i, regcount = 0, index = 0;
+
+	for (i = 0; i < regs->count; i += 2)
+		regcount += RANGE(regs->registers, i);
+
+	obj->handle = (const void *) regs;
+	obj->data = kcalloc(regcount, sizeof(u32), GFP_KERNEL);
+	if (!obj->data)
+		return;
+
+	for (i = 0; i < regs->count; i += 2) {
+		u32 count = RANGE(regs->registers, i);
+		int j;
+
+		for (j = 0; j < count; j++)
+			obj->data[index++] = gmu_read(gmu,
+				regs->registers[i] + j);
+	}
+}
+
+static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
+		struct a6xx_gpu_state *a6xx_state)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+
+	a6xx_state->gmu_registers = kcalloc(2,
+		sizeof(*a6xx_state->gmu_registers), GFP_KERNEL);
+
+	if (!a6xx_state->gmu_registers)
+		return;
+
+	a6xx_state->nr_gmu_registers = 2;
+
+	/* Get the CX GMU registers from AHB */
+	_a6xx_get_gmu_registers(gpu, &a6xx_gmu_reglist[0],
+		&a6xx_state->gmu_registers[0]);
+
+	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
+		return;
+
+	/* Set the fence to ALLOW mode so we can access the registers */
+	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
+
+	_a6xx_get_gmu_registers(gpu, &a6xx_gmu_reglist[1],
+		&a6xx_state->gmu_registers[1]);
+}
+
+static void a6xx_get_registers(struct msm_gpu *gpu,
+		struct a6xx_gpu_state *a6xx_state,
+		struct a6xx_crashdumper *dumper)
+{
+	int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
+		ARRAY_SIZE(a6xx_reglist) +
+		ARRAY_SIZE(a6xx_hlsq_reglist);
+	int index = 0;
+
+	a6xx_state->registers = kcalloc(count, sizeof(*a6xx_state->registers),
+		GFP_KERNEL);
+
+	if (!a6xx_state->registers)
+		return;
+
+	a6xx_state->nr_registers = count;
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
+		a6xx_get_ahb_gpu_registers(gpu,
+			&a6xx_ahb_reglist[i],
+			&a6xx_state->registers[index++]);
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
+		a6xx_get_crashdumper_registers(gpu,
+			&a6xx_reglist[i],
+			&a6xx_state->registers[index++],
+			dumper);
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
+		a6xx_get_crashdumper_hlsq_registers(gpu,
+			&a6xx_hlsq_reglist[i],
+			&a6xx_state->registers[index++],
+			dumper);
+}
+
+/* Read a block of data from an indexed register pair */
+static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
+		const struct a6xx_indexed_registers *indexed,
+		struct a6xx_gpu_state_obj *obj)
+{
+	int i;
+
+	obj->handle = (const void *) indexed;
+	obj->data = kcalloc(indexed->count, sizeof(u32), GFP_KERNEL);
+	if (!obj->data)
+		return;
+
+	/* All the indexed banks start at address 0 */
+	gpu_write(gpu, indexed->addr, 0);
+
+	/* Read the data - each read increments the internal address by 1 */
+	for (i = 0; i < indexed->count; i++)
+		obj->data[i] = gpu_read(gpu, indexed->data);
+}
+
+static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
+		struct a6xx_gpu_state *a6xx_state)
+{
+	u32 mempool_size;
+	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
+	int i;
+
+	a6xx_state->indexed_regs = kcalloc(count,
+		sizeof(a6xx_state->indexed_regs), GFP_KERNEL);
+	if (!a6xx_state->indexed_regs)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
+		a6xx_get_indexed_regs(gpu, &a6xx_indexed_reglist[i],
+			&a6xx_state->indexed_regs[i]);
+
+	/* Set the CP mempool size to 0 to stabilize it while dumping */
+	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
+	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
+
+	/* Get the contents of the CP mempool */
+	a6xx_get_indexed_regs(gpu, &a6xx_cp_mempool_indexed,
+		&a6xx_state->indexed_regs[i]);
+
+	/*
+	 * Offset 0x2000 in the mempool is the size - copy the saved size over
+	 * so the data is consistent
+	 */
+	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
+
+	/* Restore the size in the hardware */
+	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
+
+	a6xx_state->nr_indexed_regs = count;
+}
+
+struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
+{
+	struct a6xx_crashdumper dumper = { 0 };
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
+		GFP_KERNEL);
+
+	if (!a6xx_state)
+		return ERR_PTR(-ENOMEM);
+
+	/* Get the generic state from the adreno core */
+	adreno_gpu_state_get(gpu, &a6xx_state->base);
+
+	a6xx_get_gmu_registers(gpu, a6xx_state);
+
+	/* If GX isn't on the rest of the data isn't going to be accessible */
+	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
+		return &a6xx_state->base;
+
+	/* Get the banks of indexed registers */
+	a6xx_get_indexed_registers(gpu, a6xx_state);
+
+	/* Try to initialze the crashdumper */
+	if (!a6xx_crashdumper_init(gpu, &dumper)) {
+		a6xx_get_registers(gpu, a6xx_state, &dumper);
+		a6xx_get_shaders(gpu, a6xx_state, &dumper);
+		a6xx_get_clusters(gpu, a6xx_state, &dumper);
+		a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper);
+
+		a6xx_crashdumper_free(gpu, &dumper);
+	}
+
+	a6xx_get_debugbus(gpu, a6xx_state);
+
+	return  &a6xx_state->base;
+}
+
+void a6xx_gpu_state_destroy(struct kref *kref)
+{
+	struct msm_gpu_state *state = container_of(kref,
+			struct msm_gpu_state, ref);
+	struct a6xx_gpu_state *a6xx_state = container_of(state,
+			struct a6xx_gpu_state, base);
+	int i;
+
+	for (i = 0; i < a6xx_state->nr_gmu_registers; i++)
+		kfree(a6xx_state->gmu_registers[i].data);
+
+	kfree(a6xx_state->gmu_registers);
+
+	for (i = 0; i < a6xx_state->nr_registers; i++)
+		kfree(a6xx_state->registers[i].data);
+
+	kfree(a6xx_state->registers);
+
+	for (i = 0; i < a6xx_state->nr_shaders; i++)
+		kfree(a6xx_state->shaders[i].data);
+
+	kfree(a6xx_state->shaders);
+
+	for (i = 0; i < a6xx_state->nr_clusters; i++)
+		kfree(a6xx_state->clusters[i].data);
+
+	kfree(a6xx_state->clusters);
+
+	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
+		kfree(a6xx_state->dbgahb_clusters[i].data);
+
+	kfree(a6xx_state->dbgahb_clusters);
+
+	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
+		kfree(a6xx_state->indexed_regs[i].data);
+
+	kfree(a6xx_state->indexed_regs);
+
+	for (i = 0; i < a6xx_state->nr_debugbus; i++)
+		kfree(a6xx_state->debugbus[i].data);
+
+	kfree(a6xx_state->debugbus);
+
+	if (a6xx_state->vbif_debugbus)
+		kfree(a6xx_state->vbif_debugbus->data);
+
+	kfree(a6xx_state->vbif_debugbus);
+
+	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++)
+		kfree(a6xx_state->cx_debugbus[i].data);
+
+	kfree(a6xx_state->cx_debugbus);
+
+
+	kfree(a6xx_state);
+}
+
+int a6xx_gpu_state_put(struct msm_gpu_state *state)
+{
+	if (IS_ERR_OR_NULL(state))
+		return 1;
+
+	return kref_put(&state->ref, a6xx_gpu_state_destroy);
+}
+
+static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
+		struct drm_printer *p)
+{
+	int i, index = 0;
+
+	if (!data)
+		return;
+
+	for (i = 0; i < count; i += 2) {
+		u32 count = RANGE(registers, i);
+		u32 offset = registers[i];
+		int j;
+
+		for (j = 0; j < count; index++, offset++, j++) {
+			if (data[index] == 0xdeafbead)
+				continue;
+
+			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
+				offset << 2, data[index]);
+		}
+	}
+}
+
+static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
+{
+	char out[ASCII85_BUFSZ];
+	long i, l, datalen = 0;
+
+	for (i = 0; i < len >> 2; i++) {
+		if (data[i])
+			datalen = (i << 2) + 1;
+	}
+
+	if (datalen == 0)
+		return;
+
+	drm_puts(p, "    data: !!ascii85 |\n");
+	drm_puts(p, "      ");
+
+
+	l = ascii85_encode_len(datalen);
+
+	for (i = 0; i < l; i++)
+		drm_puts(p, ascii85_encode(data[i], out));
+
+	drm_puts(p, "\n");
+}
+
+static void print_name(struct drm_printer *p, const char *fmt, const char *name)
+{
+	drm_puts(p, fmt);
+	drm_puts(p, name);
+	drm_puts(p, "\n");
+}
+
+static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
+		struct drm_printer *p)
+{
+	const struct a6xx_shader_block *block = obj->handle;
+	int i;
+
+	if (!obj->handle)
+		return;
+
+	print_name(p, "  - type: ", block->name);
+
+	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
+		drm_printf(p, "    - bank: %d\n", i);
+		drm_printf(p, "      size: %d\n", block->size);
+
+		if (!obj->data)
+			continue;
+
+		print_ascii85(p, block->size << 2,
+			obj->data + (block->size * i));
+	}
+}
+
+static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
+		struct drm_printer *p)
+{
+	int ctx, index = 0;
+
+	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
+		int j;
+
+		drm_printf(p, "    - context: %d\n", ctx);
+
+		for (j = 0; j < size; j += 2) {
+			u32 count = RANGE(registers, j);
+			u32 offset = registers[j];
+			int k;
+
+			for (k = 0; k < count; index++, offset++, k++) {
+				if (data[index] == 0xdeafbead)
+					continue;
+
+				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
+					offset << 2, data[index]);
+			}
+		}
+	}
+}
+
+static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
+		struct drm_printer *p)
+{
+	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
+
+	if (dbgahb) {
+		print_name(p, "  - cluster-name: ", dbgahb->name);
+		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
+			obj->data, p);
+	}
+}
+
+static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
+		struct drm_printer *p)
+{
+	const struct a6xx_cluster *cluster = obj->handle;
+
+	if (cluster) {
+		print_name(p, "  - cluster-name: ", cluster->name);
+		a6xx_show_cluster_data(cluster->registers, cluster->count,
+			obj->data, p);
+	}
+}
+
+static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
+		struct drm_printer *p)
+{
+	const struct a6xx_indexed_registers *indexed = obj->handle;
+
+	if (!indexed)
+		return;
+
+	print_name(p, "  - regs-name: ", indexed->name);
+	drm_printf(p, "    dwords: %d\n", indexed->count);
+
+	print_ascii85(p, indexed->count << 2, obj->data);
+}
+
+static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
+		u32 *data, struct drm_printer *p)
+{
+	if (block) {
+		print_name(p, "  - debugbus-block: ", block->name);
+
+		/*
+		 * count for regular debugbus data is in quadwords,
+		 * but print the size in dwords for consistency
+		 */
+		drm_printf(p, "    count: %d\n", block->count << 1);
+
+		print_ascii85(p, block->count << 3, data);
+	}
+}
+
+static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
+		struct drm_printer *p)
+{
+	int i;
+
+	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
+		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
+
+		a6xx_show_debugbus_block(obj->handle, obj->data, p);
+	}
+
+	if (a6xx_state->vbif_debugbus) {
+		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
+
+		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
+		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
+
+		/* vbif debugbus data is in dwords.  Confusing, huh? */
+		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
+	}
+
+	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
+		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
+
+		a6xx_show_debugbus_block(obj->handle, obj->data, p);
+	}
+}
+
+void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
+		struct drm_printer *p)
+{
+	struct a6xx_gpu_state *a6xx_state = container_of(state,
+			struct a6xx_gpu_state, base);
+	int i;
+
+	if (IS_ERR_OR_NULL(state))
+		return;
+
+	adreno_show(gpu, state, p);
+
+	drm_puts(p, "registers:\n");
+	for (i = 0; i < a6xx_state->nr_registers; i++) {
+		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
+		const struct a6xx_registers *regs = obj->handle;
+
+		if (!obj->handle)
+			continue;
+
+		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
+	}
+
+	drm_puts(p, "registers-gmu:\n");
+	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
+		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
+		const struct a6xx_registers *regs = obj->handle;
+
+		if (!obj->handle)
+			continue;
+
+		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
+	}
+
+	drm_puts(p, "indexed-registers:\n");
+	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
+		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
+
+	drm_puts(p, "shader-blocks:\n");
+	for (i = 0; i < a6xx_state->nr_shaders; i++)
+		a6xx_show_shader(&a6xx_state->shaders[i], p);
+
+	drm_puts(p, "clusters:\n");
+	for (i = 0; i < a6xx_state->nr_clusters; i++)
+		a6xx_show_cluster(&a6xx_state->clusters[i], p);
+
+	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
+		a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
+
+	drm_puts(p, "debugbus:\n");
+	a6xx_show_debugbus(a6xx_state, p);
+}
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
new file mode 100644
index 000000000000..68cccfa2870a
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */
+
+#ifndef _A6XX_CRASH_DUMP_H_
+#define _A6XX_CRASH_DUMP_H_
+
+#include "a6xx.xml.h"
+
+#define A6XX_NUM_CONTEXTS 2
+#define A6XX_NUM_SHADER_BANKS 3
+
+static const u32 a6xx_gras_cluster[] = {
+	0x8000, 0x8006, 0x8010, 0x8092, 0x8094, 0x809d, 0x80a0, 0x80a6,
+	0x80af, 0x80f1, 0x8100, 0x8107, 0x8109, 0x8109, 0x8110, 0x8110,
+	0x8400, 0x840b,
+};
+
+static const u32 a6xx_ps_cluster_rac[] = {
+	0x8800, 0x8806, 0x8809, 0x8811, 0x8818, 0x881e, 0x8820, 0x8865,
+	0x8870, 0x8879, 0x8880, 0x8889, 0x8890, 0x8891, 0x8898, 0x8898,
+	0x88c0, 0x88c1, 0x88d0, 0x88e3, 0x8900, 0x890c, 0x890f, 0x891a,
+	0x8c00, 0x8c01, 0x8c08, 0x8c10, 0x8c17, 0x8c1f, 0x8c26, 0x8c33,
+};
+
+static const u32 a6xx_ps_cluster_rbp[] = {
+	0x88f0, 0x88f3, 0x890d, 0x890e, 0x8927, 0x8928, 0x8bf0, 0x8bf1,
+	0x8c02, 0x8c07, 0x8c11, 0x8c16, 0x8c20, 0x8c25,
+};
+
+static const u32 a6xx_ps_cluster[] = {
+	0x9200, 0x9216, 0x9218, 0x9236, 0x9300, 0x9306,
+};
+
+static const u32 a6xx_fe_cluster[] = {
+	0x9300, 0x9306, 0x9800, 0x9806, 0x9b00, 0x9b07, 0xa000, 0xa009,
+	0xa00e, 0xa0ef, 0xa0f8, 0xa0f8,
+};
+
+static const u32 a6xx_pc_vs_cluster[] = {
+	0x9100, 0x9108, 0x9300, 0x9306, 0x9980, 0x9981, 0x9b00, 0x9b07,
+};
+
+#define CLUSTER_FE    0
+#define CLUSTER_SP_VS 1
+#define CLUSTER_PC_VS 2
+#define CLUSTER_GRAS  3
+#define CLUSTER_SP_PS 4
+#define CLUSTER_PS    5
+
+#define CLUSTER(_id, _reg, _sel_reg, _sel_val) \
+	{ .id = _id, .name = #_id,\
+		.registers = _reg, \
+		.count = ARRAY_SIZE(_reg), \
+		.sel_reg = _sel_reg, .sel_val = _sel_val }
+
+static const struct a6xx_cluster {
+	u32 id;
+	const char *name;
+	const u32 *registers;
+	size_t count;
+	u32 sel_reg;
+	u32 sel_val;
+} a6xx_clusters[] = {
+	CLUSTER(CLUSTER_GRAS, a6xx_gras_cluster, 0, 0),
+	CLUSTER(CLUSTER_PS, a6xx_ps_cluster_rac, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 0x0),
+	CLUSTER(CLUSTER_PS, a6xx_ps_cluster_rbp, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 0x9),
+	CLUSTER(CLUSTER_PS, a6xx_ps_cluster, 0, 0),
+	CLUSTER(CLUSTER_FE, a6xx_fe_cluster, 0, 0),
+	CLUSTER(CLUSTER_PC_VS, a6xx_pc_vs_cluster, 0, 0),
+};
+
+static const u32 a6xx_sp_vs_hlsq_cluster[] = {
+	0xb800, 0xb803, 0xb820, 0xb822,
+};
+
+static const u32 a6xx_sp_vs_sp_cluster[] = {
+	0xa800, 0xa824, 0xa830, 0xa83c, 0xa840, 0xa864, 0xa870, 0xa895,
+	0xa8a0, 0xa8af, 0xa8c0, 0xa8c3,
+};
+
+static const u32 a6xx_hlsq_duplicate_cluster[] = {
+	0xbb10, 0xbb11, 0xbb20, 0xbb29,
+};
+
+static const u32 a6xx_hlsq_2d_duplicate_cluster[] = {
+	0xbd80, 0xbd80,
+};
+
+static const u32 a6xx_sp_duplicate_cluster[] = {
+	0xab00, 0xab00, 0xab04, 0xab05, 0xab10, 0xab1b, 0xab20, 0xab20,
+};
+
+static const u32 a6xx_tp_duplicate_cluster[] = {
+	0xb300, 0xb307, 0xb309, 0xb309, 0xb380, 0xb382,
+};
+
+static const u32 a6xx_sp_ps_hlsq_cluster[] = {
+	0xb980, 0xb980, 0xb982, 0xb987, 0xb990, 0xb99b, 0xb9a0, 0xb9a2,
+	0xb9c0, 0xb9c9,
+};
+
+static const u32 a6xx_sp_ps_hlsq_2d_cluster[] = {
+	0xbd80, 0xbd80,
+};
+
+static const u32 a6xx_sp_ps_sp_cluster[] = {
+	0xa980, 0xa9a8, 0xa9b0, 0xa9bc, 0xa9d0, 0xa9d3, 0xa9e0, 0xa9f3,
+	0xaa00, 0xaa00, 0xaa30, 0xaa31,
+};
+
+static const u32 a6xx_sp_ps_sp_2d_cluster[] = {
+	0xacc0, 0xacc0,
+};
+
+static const u32 a6xx_sp_ps_tp_cluster[] = {
+	0xb180, 0xb183, 0xb190, 0xb191,
+};
+
+static const u32 a6xx_sp_ps_tp_2d_cluster[] = {
+	0xb4c0, 0xb4d1,
+};
+
+#define CLUSTER_DBGAHB(_id, _base, _type, _reg) \
+	{ .name = #_id, .statetype = _type, .base = _base, \
+		.registers = _reg, .count = ARRAY_SIZE(_reg) }
+
+static const struct a6xx_dbgahb_cluster {
+	const char *name;
+	u32 statetype;
+	u32 base;
+	const u32 *registers;
+	size_t count;
+} a6xx_dbgahb_clusters[] = {
+	CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002e000, 0x41, a6xx_sp_vs_hlsq_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002a000, 0x21, a6xx_sp_vs_sp_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002e000, 0x41, a6xx_hlsq_duplicate_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002f000, 0x45, a6xx_hlsq_2d_duplicate_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002a000, 0x21, a6xx_sp_duplicate_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002c000, 0x1, a6xx_tp_duplicate_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002e000, 0x42, a6xx_sp_ps_hlsq_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002f000, 0x46, a6xx_sp_ps_hlsq_2d_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002a000, 0x22, a6xx_sp_ps_sp_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002b000, 0x26, a6xx_sp_ps_sp_2d_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002c000, 0x2, a6xx_sp_ps_tp_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002d000, 0x6, a6xx_sp_ps_tp_2d_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002e000, 0x42, a6xx_hlsq_duplicate_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002a000, 0x22, a6xx_sp_duplicate_cluster),
+	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002c000, 0x2, a6xx_tp_duplicate_cluster),
+};
+
+static const u32 a6xx_hlsq_registers[] = {
+	0xbe00, 0xbe01, 0xbe04, 0xbe05, 0xbe08, 0xbe09, 0xbe10, 0xbe15,
+	0xbe20, 0xbe23,
+};
+
+static const u32 a6xx_sp_registers[] = {
+	0xae00, 0xae04, 0xae0c, 0xae0c, 0xae0f, 0xae2b, 0xae30, 0xae32,
+	0xae35, 0xae35, 0xae3a, 0xae3f, 0xae50, 0xae52,
+};
+
+static const u32 a6xx_tp_registers[] = {
+	0xb600, 0xb601, 0xb604, 0xb605, 0xb610, 0xb61b, 0xb620, 0xb623,
+};
+
+struct a6xx_registers {
+	const u32 *registers;
+	size_t count;
+	u32 val0;
+	u32 val1;
+};
+
+#define HLSQ_DBG_REGS(_base, _type, _array) \
+	{ .val0 = _base, .val1 = _type, .registers = _array, \
+		.count = ARRAY_SIZE(_array), }
+
+static const struct a6xx_registers a6xx_hlsq_reglist[] = {
+	HLSQ_DBG_REGS(0x0002F800, 0x40, a6xx_hlsq_registers),
+	HLSQ_DBG_REGS(0x0002B800, 0x20, a6xx_sp_registers),
+	HLSQ_DBG_REGS(0x0002D800, 0x0, a6xx_tp_registers),
+};
+
+#define SHADER(_type, _size) \
+	{ .type = _type, .name = #_type, .size = _size }
+
+static const struct a6xx_shader_block {
+	const char *name;
+	u32 type;
+	u32 size;
+} a6xx_shader_blocks[] = {
+	SHADER(A6XX_TP0_TMO_DATA, 0x200),
+	SHADER(A6XX_TP0_SMO_DATA, 0x80),
+	SHADER(A6XX_TP0_MIPMAP_BASE_DATA, 0x3c0),
+	SHADER(A6XX_TP1_TMO_DATA, 0x200),
+	SHADER(A6XX_TP1_SMO_DATA, 0x80),
+	SHADER(A6XX_TP1_MIPMAP_BASE_DATA, 0x3c0),
+	SHADER(A6XX_SP_INST_DATA, 0x800),
+	SHADER(A6XX_SP_LB_0_DATA, 0x800),
+	SHADER(A6XX_SP_LB_1_DATA, 0x800),
+	SHADER(A6XX_SP_LB_2_DATA, 0x800),
+	SHADER(A6XX_SP_LB_3_DATA, 0x800),
+	SHADER(A6XX_SP_LB_4_DATA, 0x800),
+	SHADER(A6XX_SP_LB_5_DATA, 0x200),
+	SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x2000),
+	SHADER(A6XX_SP_CB_LEGACY_DATA, 0x280),
+	SHADER(A6XX_SP_UAV_DATA, 0x80),
+	SHADER(A6XX_SP_INST_TAG, 0x80),
+	SHADER(A6XX_SP_CB_BINDLESS_TAG, 0x80),
+	SHADER(A6XX_SP_TMO_UMO_TAG, 0x80),
+	SHADER(A6XX_SP_SMO_TAG, 0x80),
+	SHADER(A6XX_SP_STATE_DATA, 0x3f),
+	SHADER(A6XX_HLSQ_CHUNK_CVS_RAM, 0x1c0),
+	SHADER(A6XX_HLSQ_CHUNK_CPS_RAM, 0x280),
+	SHADER(A6XX_HLSQ_CHUNK_CVS_RAM_TAG, 0x40),
+	SHADER(A6XX_HLSQ_CHUNK_CPS_RAM_TAG, 0x40),
+	SHADER(A6XX_HLSQ_ICB_CVS_CB_BASE_TAG, 0x4),
+	SHADER(A6XX_HLSQ_ICB_CPS_CB_BASE_TAG, 0x4),
+	SHADER(A6XX_HLSQ_CVS_MISC_RAM, 0x1c0),
+	SHADER(A6XX_HLSQ_CPS_MISC_RAM, 0x580),
+	SHADER(A6XX_HLSQ_INST_RAM, 0x800),
+	SHADER(A6XX_HLSQ_GFX_CVS_CONST_RAM, 0x800),
+	SHADER(A6XX_HLSQ_GFX_CPS_CONST_RAM, 0x800),
+	SHADER(A6XX_HLSQ_CVS_MISC_RAM_TAG, 0x8),
+	SHADER(A6XX_HLSQ_CPS_MISC_RAM_TAG, 0x4),
+	SHADER(A6XX_HLSQ_INST_RAM_TAG, 0x80),
+	SHADER(A6XX_HLSQ_GFX_CVS_CONST_RAM_TAG, 0xc),
+	SHADER(A6XX_HLSQ_GFX_CPS_CONST_RAM_TAG, 0x10),
+	SHADER(A6XX_HLSQ_PWR_REST_RAM, 0x28),
+	SHADER(A6XX_HLSQ_PWR_REST_TAG, 0x14),
+	SHADER(A6XX_HLSQ_DATAPATH_META, 0x40),
+	SHADER(A6XX_HLSQ_FRONTEND_META, 0x40),
+	SHADER(A6XX_HLSQ_INDIRECT_META, 0x40),
+};
+
+static const u32 a6xx_rb_rac_registers[] = {
+	0x8e04, 0x8e05, 0x8e07, 0x8e08, 0x8e10, 0x8e1c, 0x8e20, 0x8e25,
+	0x8e28, 0x8e28, 0x8e2c, 0x8e2f, 0x8e50, 0x8e52,
+};
+
+static const u32 a6xx_rb_rbp_registers[] = {
+	0x8e01, 0x8e01, 0x8e0c, 0x8e0c, 0x8e3b, 0x8e3e, 0x8e40, 0x8e43,
+	0x8e53, 0x8e5f, 0x8e70, 0x8e77,
+};
+
+static const u32 a6xx_registers[] = {
+	/* RBBM */
+	0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001b,
+	0x001e, 0x0032, 0x0038, 0x003c, 0x0042, 0x0042, 0x0044, 0x0044,
+	0x0047, 0x0047, 0x0056, 0x0056, 0x00ad, 0x00ae, 0x00b0, 0x00fb,
+	0x0100, 0x011d, 0x0200, 0x020d, 0x0218, 0x023d, 0x0400, 0x04f9,
+	0x0500, 0x0500, 0x0505, 0x050b, 0x050e, 0x0511, 0x0533, 0x0533,
+	0x0540, 0x0555,
+	/* CP */
+	0x0800, 0x0808, 0x0810, 0x0813, 0x0820, 0x0821, 0x0823, 0x0824,
+	0x0826, 0x0827, 0x0830, 0x0833, 0x0840, 0x0843, 0x084f, 0x086f,
+	0x0880, 0x088a, 0x08a0, 0x08ab, 0x08c0, 0x08c4, 0x08d0, 0x08dd,
+	0x08f0, 0x08f3, 0x0900, 0x0903, 0x0908, 0x0911, 0x0928, 0x093e,
+	0x0942, 0x094d, 0x0980, 0x0984, 0x098d, 0x0996, 0x0998, 0x099e,
+	0x09a0, 0x09a6, 0x09a8, 0x09ae, 0x09b0, 0x09b1, 0x09c2, 0x09c8,
+	0x0a00, 0x0a03,
+	/* VSC */
+	0x0c00, 0x0c04, 0x0c06, 0x0c06, 0x0c10, 0x0cd9, 0x0e00, 0x0e0e,
+	/* UCHE */
+	0x0e10, 0x0e13, 0x0e17, 0x0e19, 0x0e1c, 0x0e2b, 0x0e30, 0x0e32,
+	0x0e38, 0x0e39,
+	/* GRAS */
+	0x8600, 0x8601, 0x8610, 0x861b, 0x8620, 0x8620, 0x8628, 0x862b,
+	0x8630, 0x8637,
+	/* VPC */
+	0x9600, 0x9604, 0x9624, 0x9637,
+	/* PC */
+	0x9e00, 0x9e01, 0x9e03, 0x9e0e, 0x9e11, 0x9e16, 0x9e19, 0x9e19,
+	0x9e1c, 0x9e1c, 0x9e20, 0x9e23, 0x9e30, 0x9e31, 0x9e34, 0x9e34,
+	0x9e70, 0x9e72, 0x9e78, 0x9e79, 0x9e80, 0x9fff,
+	/* VFD */
+	0xa600, 0xa601, 0xa603, 0xa603, 0xa60a, 0xa60a, 0xa610, 0xa617,
+	0xa630, 0xa630,
+};
+
+#define REGS(_array, _sel_reg, _sel_val) \
+	{ .registers = _array, .count = ARRAY_SIZE(_array), \
+		.val0 = _sel_reg, .val1 = _sel_val }
+
+static const struct a6xx_registers a6xx_reglist[] = {
+	REGS(a6xx_registers, 0, 0),
+	REGS(a6xx_rb_rac_registers, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 0),
+	REGS(a6xx_rb_rbp_registers, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 9),
+};
+
+static const u32 a6xx_ahb_registers[] = {
+	/* RBBM_STATUS - RBBM_STATUS3 */
+	0x210, 0x213,
+	/* CP_STATUS_1 */
+	0x825, 0x825,
+};
+
+static const u32 a6xx_vbif_registers[] = {
+	0x3000, 0x3007, 0x300c, 0x3014, 0x3018, 0x302d, 0x3030, 0x3031,
+	0x3034, 0x3036, 0x303c, 0x303d, 0x3040, 0x3040, 0x3042, 0x3042,
+	0x3049, 0x3049, 0x3058, 0x3058, 0x305a, 0x3061, 0x3064, 0x3068,
+	0x306c, 0x306d, 0x3080, 0x3088, 0x308b, 0x308c, 0x3090, 0x3094,
+	0x3098, 0x3098, 0x309c, 0x309c, 0x30c0, 0x30c0, 0x30c8, 0x30c8,
+	0x30d0, 0x30d0, 0x30d8, 0x30d8, 0x30e0, 0x30e0, 0x3100, 0x3100,
+	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
+	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x3154, 0x3154,
+	0x3156, 0x3156, 0x3158, 0x3158, 0x315a, 0x315a, 0x315c, 0x315c,
+	0x315e, 0x315e, 0x3160, 0x3160, 0x3162, 0x3162, 0x340c, 0x340c,
+	0x3410, 0x3410, 0x3800, 0x3801,
+};
+
+static const struct a6xx_registers a6xx_ahb_reglist[] = {
+	REGS(a6xx_ahb_registers, 0, 0),
+	REGS(a6xx_vbif_registers, 0, 0),
+};
+
+static const u32 a6xx_gmu_gx_registers[] = {
+	/* GMU GX */
+	0x0000, 0x0000, 0x0010, 0x0013, 0x0016, 0x0016, 0x0018, 0x001b,
+	0x001e, 0x001e, 0x0020, 0x0023, 0x0026, 0x0026, 0x0028, 0x002b,
+	0x002e, 0x002e, 0x0030, 0x0033, 0x0036, 0x0036, 0x0038, 0x003b,
+	0x003e, 0x003e, 0x0040, 0x0043, 0x0046, 0x0046, 0x0080, 0x0084,
+	0x0100, 0x012b, 0x0140, 0x0140,
+};
+
+static const u32 a6xx_gmu_cx_registers[] = {
+	/* GMU CX */
+	0x4c00, 0x4c07, 0x4c10, 0x4c12, 0x4d00, 0x4d00, 0x4d07, 0x4d0a,
+	0x5000, 0x5004, 0x5007, 0x5008, 0x500b, 0x500c, 0x500f, 0x501c,
+	0x5024, 0x502a, 0x502d, 0x5030, 0x5040, 0x5053, 0x5087, 0x5089,
+	0x50a0, 0x50a2, 0x50a4, 0x50af, 0x50c0, 0x50c3, 0x50d0, 0x50d0,
+	0x50e4, 0x50e4, 0x50e8, 0x50ec, 0x5100, 0x5103, 0x5140, 0x5140,
+	0x5142, 0x5144, 0x514c, 0x514d, 0x514f, 0x5151, 0x5154, 0x5154,
+	0x5157, 0x5158, 0x515d, 0x515d, 0x5162, 0x5162, 0x5164, 0x5165,
+	0x5180, 0x5186, 0x5190, 0x519e, 0x51c0, 0x51c0, 0x51c5, 0x51cc,
+	0x51e0, 0x51e2, 0x51f0, 0x51f0, 0x5200, 0x5201,
+	/* GPU RSCC */
+	0x8c8c, 0x8c8c, 0x8d01, 0x8d02, 0x8f40, 0x8f42, 0x8f44, 0x8f47,
+	0x8f4c, 0x8f87, 0x8fec, 0x8fef, 0x8ff4, 0x902f, 0x9094, 0x9097,
+	0x909c, 0x90d7, 0x913c, 0x913f, 0x9144, 0x917f,
+	/* GMU AO */
+	0x9300, 0x9316, 0x9400, 0x9400,
+	/* GPU CC */
+	0x9800, 0x9812, 0x9840, 0x9852, 0x9c00, 0x9c04, 0x9c07, 0x9c0b,
+	0x9c15, 0x9c1c, 0x9c1e, 0x9c2d, 0x9c3c, 0x9c3d, 0x9c3f, 0x9c40,
+	0x9c42, 0x9c49, 0x9c58, 0x9c5a, 0x9d40, 0x9d5e, 0xa000, 0xa002,
+	0xa400, 0xa402, 0xac00, 0xac02, 0xb000, 0xb002, 0xb400, 0xb402,
+	0xb800, 0xb802,
+	/* GPU CC ACD */
+	0xbc00, 0xbc16, 0xbc20, 0xbc27,
+};
+
+static const struct a6xx_registers a6xx_gmu_reglist[] = {
+	REGS(a6xx_gmu_cx_registers, 0, 0),
+	REGS(a6xx_gmu_gx_registers, 0, 0),
+};
+
+static const struct a6xx_indexed_registers {
+	const char *name;
+	u32 addr;
+	u32 data;
+	u32 count;
+} a6xx_indexed_reglist[] = {
+	{ "CP_SEQ_STAT", REG_A6XX_CP_SQE_STAT_ADDR,
+		REG_A6XX_CP_SQE_STAT_DATA, 0x33 },
+	{ "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR,
+		REG_A6XX_CP_DRAW_STATE_DATA, 0x100 },
+	{ "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR,
+		REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x6000 },
+	{ "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR,
+		REG_A6XX_CP_ROQ_DBG_DATA, 0x400 },
+};
+
+static const struct a6xx_indexed_registers a6xx_cp_mempool_indexed = {
+	"CP_MEMPOOOL", REG_A6XX_CP_MEM_POOL_DBG_ADDR,
+		REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2060,
+};
+
+#define DEBUGBUS(_id, _count) { .id = _id, .name = #_id, .count = _count }
+
+static const struct a6xx_debugbus_block {
+	const char *name;
+	u32 id;
+	u32 count;
+} a6xx_debugbus_blocks[] = {
+	DEBUGBUS(A6XX_DBGBUS_CP, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_RBBM, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_HLSQ, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_UCHE, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_DPM, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_TESS, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_PC, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_VFDP, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_VPC, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_TSE, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_RAS, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_VSC, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_COM, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_LRZ, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_A2D, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_CCUFCHE, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_RBP, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_DCS, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_DBGC, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_GMU_GX, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_TPFCHE, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_GPC, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_LARC, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_HLSQ_SPTP, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_RB_0, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_RB_1, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_UCHE_WRAPPER, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_CCU_0, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_CCU_1, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_VFD_0, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_VFD_1, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_VFD_2, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_VFD_3, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_SP_0, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_SP_1, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_TPL1_0, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_TPL1_1, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_TPL1_2, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_TPL1_3, 0x100),
+};
+
+static const struct a6xx_debugbus_block a6xx_cx_debugbus_blocks[] = {
+	DEBUGBUS(A6XX_DBGBUS_GMU_CX, 0x100),
+	DEBUGBUS(A6XX_DBGBUS_CX, 0x100),
+};
+
+#endif
-- 
2.18.0