[RFC PATCH 12/12] drm/i915: Actually write the AUB file

Oscar Mateo <oscar.mateo@xxxxxxxxx> · Fri, 27 Oct 2017 11:01:15 -0700

Use all the information previously recorded in the GPU error
state to write an AUB file.

TODO: output an already compressed file?

Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx>
Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/i915/Makefile        |   2 +-
 drivers/gpu/drm/i915/i915_aubcrash.c | 174 +++++++++++++++++++++++++++++++++++
 2 files changed, 175 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 04956c7..f958291 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -124,7 +124,7 @@ i915-y += dvo_ch7017.o \
 
 # Post-mortem debug and GPU hang state capture
 i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
-i915-$(CONFIG_DRM_I915_AUB_CRASH_DUMP) += i915_aubcrash.o
+i915-$(CONFIG_DRM_I915_AUB_CRASH_DUMP) += i915_aubmemtrace.o i915_aubcrash.o
 i915-$(CONFIG_DRM_I915_SELFTEST) += \
 	selftests/i915_random.o \
 	selftests/i915_selftest.o
diff --git a/drivers/gpu/drm/i915/i915_aubcrash.c b/drivers/gpu/drm/i915/i915_aubcrash.c
index ebdbf09..a336777 100644
--- a/drivers/gpu/drm/i915/i915_aubcrash.c
+++ b/drivers/gpu/drm/i915/i915_aubcrash.c
@@ -27,6 +27,7 @@
 
 #include "intel_drv.h"
 #include "i915_aubcrash.h"
+#include "i915_aubmemtrace.h"
 
 /**
  * DOC: AubCrash
@@ -255,8 +256,181 @@ void i915_error_page_walk(struct i915_address_space *vm,
 	}
 }
 
+#ifdef CONFIG_DRM_I915_COMPRESS_ERROR
+
+void write_aub(void *priv, const void *data, size_t len)
+{
+	struct drm_i915_error_state_buf *e = priv;
+
+	/* TODO: Compress the AUB file on the go */
+	i915_error_binary_write(e, data, len);
+}
+
+#else
+
+void write_aub(void *priv, const void *data, size_t len)
+{
+	struct drm_i915_error_state_buf *e = priv;
+
+	i915_error_binary_write(e, data, len);
+}
+
+#endif
+
+#define AUB_COMMENT_ERROR_OBJ(name, obj) do { \
+	i915_aub_comment(aub, name " (%08x_%08x %8u)", \
+			 upper_32_bits((obj)->gtt_offset), \
+			 lower_32_bits((obj)->gtt_offset), \
+			 (obj)->gtt_size); \
+} while (0)
+
 int i915_error_state_to_aub(struct drm_i915_error_state_buf *m,
 			    const struct i915_gpu_state *error)
 {
+	struct drm_i915_private *dev_priv = m->i915;
+	struct intel_aub *aub;
+	int i;
+
+	aub = i915_aub_start(dev_priv, write_aub, (void *)m, "AubCrash", true);
+	if (IS_ERR(aub))
+		return PTR_ERR(aub);
+
+	if (!error) {
+		i915_aub_comment(aub, "No error state collected\n");
+		return 0;
+	}
+
+	i915_aub_comment(aub, "Registers");
+	i915_aub_register(aub, GAM_ECOCHK, error->gam_ecochk);
+	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
+		const struct drm_i915_error_engine *ee = &error->engine[i];
+		struct intel_engine_cs *engine = dev_priv->engine[i];
+
+		if (!ee->batchbuffer)
+			continue;
+
+		i915_aub_register(aub, RING_MODE_GEN7(engine),
+				  _MASKED_BIT_ENABLE(ee->vm_info.gfx_mode));
+		i915_aub_register(aub, RING_HWS_PGA(engine->mmio_base),
+				  ee->hws);
+	}
+
+	i915_aub_comment(aub, "PPGTT PML4/PDP/PD");
+	for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
+		const struct drm_i915_error_pagemap_lvl *pml4 =
+			&error->ppgtt_pml4[i];
+		int l3, l2;
+
+		if (!error->active_vm[i])
+			break;
+
+		if (pml4->storage)
+			i915_aub_gtt(aub, PPGTT_LEVEL4, pml4->paddr,
+				     pml4->storage, GEN8_PML4ES_PER_PML4);
+
+		for (l3 = 0; l3 < pml4->nxt_lvl_count; l3++) {
+			const struct drm_i915_error_pagemap_lvl *pdp =
+				&pml4->nxt_lvl[l3];
+
+			if (pdp->storage)
+				i915_aub_gtt(aub, PPGTT_LEVEL3, pdp->paddr,
+					     pdp->storage, GEN8_4LVL_PDPES);
+
+			for (l2 = 0; l2 < pdp->nxt_lvl_count; l2++) {
+				const struct drm_i915_error_pagemap_lvl *pd =
+					&pdp->nxt_lvl[l2];
+
+				i915_aub_gtt(aub, PPGTT_LEVEL2, pd->paddr,
+					     pd->storage, I915_PDES);
+			}
+		}
+	}
+
+	/* Active request */
+	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
+		const struct drm_i915_error_engine *ee = &error->engine[i];
+		struct intel_engine_cs *engine = dev_priv->engine[i];
+		int j;
+
+		if (!ee->batchbuffer)
+			continue;
+
+		i915_aub_comment(aub, "Engine %s", engine->name);
+
+		if (ee->hws_page) {
+			AUB_COMMENT_ERROR_OBJ("Hardware Status Page",
+					      ee->hws_page);
+			i915_aub_buffer(aub, true, ee->hws_page->tiling,
+					ee->hws_page->pages,
+					ee->hws_page->page_count);
+		}
+
+		if (ee->ctx) {
+			u64 gtt_offset =
+				ee->ctx->gtt_offset + LRC_GUCSHR_SZ * PAGE_SIZE;
+			u64 gtt_size =
+				ee->ctx->gtt_size - LRC_GUCSHR_SZ * PAGE_SIZE;
+
+			i915_aub_comment(aub,
+					 "Logical Ring Context (%08x_%08x %8u)",
+					 upper_32_bits(gtt_offset),
+					 lower_32_bits(gtt_offset),
+					 gtt_size);
+			i915_aub_context(aub, engine->class,
+					 ee->ctx->pages + LRC_GUCSHR_SZ,
+					 ee->ctx->page_count - LRC_GUCSHR_SZ);
+		}
+
+		if (ee->renderstate) {
+			AUB_COMMENT_ERROR_OBJ("Renderstate", ee->renderstate);
+			i915_aub_batchbuffer(aub, true, ee->renderstate->pages,
+					     ee->renderstate->page_count);
+		}
+
+		if (ee->wa_batchbuffer) {
+			AUB_COMMENT_ERROR_OBJ("Scratch", ee->wa_batchbuffer);
+			i915_aub_buffer(aub, true, I915_TILING_NONE,
+					ee->wa_batchbuffer->pages,
+					ee->wa_batchbuffer->page_count);
+		}
+
+		if (ee->wa_ctx) {
+			AUB_COMMENT_ERROR_OBJ("WA context", ee->wa_ctx);
+			i915_aub_batchbuffer(aub, true, ee->wa_ctx->pages,
+					     ee->wa_ctx->page_count);
+		}
+
+		if (ee->ringbuffer) {
+			AUB_COMMENT_ERROR_OBJ("Ringbuffer", ee->ringbuffer);
+			i915_aub_batchbuffer(aub, true, ee->ringbuffer->pages,
+					     ee->ringbuffer->page_count);
+		}
+
+		if (ee->batchbuffer) {
+			AUB_COMMENT_ERROR_OBJ("Batchbuffer", ee->batchbuffer);
+			i915_aub_batchbuffer(aub, false, ee->batchbuffer->pages,
+					     ee->batchbuffer->page_count);
+		}
+
+		for (j = 0; j < ee->user_bo_count; j++) {
+			struct drm_i915_error_object *obj = ee->user_bo[j];
+
+			AUB_COMMENT_ERROR_OBJ("BO", obj);
+			i915_aub_buffer(aub, false, obj->tiling,
+					obj->pages, obj->page_count);
+		}
+
+		/* XXX: Do I want to overwrite the head/tail inside the lrc? */
+		i915_aub_comment(aub, "ELSP submissions");
+		for (j = 0; j < ee->num_requests; j++)
+			i915_aub_elsp_submit(aub, engine,
+					     ee->requests[j].lrc_desc);
+	}
+
+	i915_aub_stop(aub);
+
+	if (m->bytes == 0 && m->err)
+		return m->err;
+
 	return 0;
 }
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx