Use all the information previously recorded in the GPU error state to write an AUB file. TODO: output an already compressed file? Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/i915_aubcrash.c | 174 +++++++++++++++++++++++++++++++++++ 2 files changed, 175 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 04956c7..f958291 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -124,7 +124,7 @@ i915-y += dvo_ch7017.o \ # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o -i915-$(CONFIG_DRM_I915_AUB_CRASH_DUMP) += i915_aubcrash.o +i915-$(CONFIG_DRM_I915_AUB_CRASH_DUMP) += i915_aubmemtrace.o i915_aubcrash.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/i915_random.o \ selftests/i915_selftest.o diff --git a/drivers/gpu/drm/i915/i915_aubcrash.c b/drivers/gpu/drm/i915/i915_aubcrash.c index ebdbf09..a336777 100644 --- a/drivers/gpu/drm/i915/i915_aubcrash.c +++ b/drivers/gpu/drm/i915/i915_aubcrash.c @@ -27,6 +27,7 @@ #include "intel_drv.h" #include "i915_aubcrash.h" +#include "i915_aubmemtrace.h" /** * DOC: AubCrash @@ -255,8 +256,181 @@ void i915_error_page_walk(struct i915_address_space *vm, } } +#ifdef CONFIG_DRM_I915_COMPRESS_ERROR + +void write_aub(void *priv, const void *data, size_t len) +{ + struct drm_i915_error_state_buf *e = priv; + + /* TODO: Compress the AUB file on the go */ + i915_error_binary_write(e, data, len); +} + +#else + +void write_aub(void *priv, const void *data, size_t len) +{ + struct drm_i915_error_state_buf *e = priv; + + i915_error_binary_write(e, data, len); +} + +#endif + +#define AUB_COMMENT_ERROR_OBJ(name, obj) do { \ + i915_aub_comment(aub, name " (%08x_%08x %8u)", \ + upper_32_bits((obj)->gtt_offset), \ + lower_32_bits((obj)->gtt_offset), \ + (obj)->gtt_size); \ +} while (0) + int i915_error_state_to_aub(struct drm_i915_error_state_buf *m, const struct i915_gpu_state *error) { + struct drm_i915_private *dev_priv = m->i915; + struct intel_aub *aub; + int i; + + aub = i915_aub_start(dev_priv, write_aub, (void *)m, "AubCrash", true); + if (IS_ERR(aub)) + return PTR_ERR(aub); + + if (!error) { + i915_aub_comment(aub, "No error state collected\n"); + return 0; + } + + i915_aub_comment(aub, "Registers"); + i915_aub_register(aub, GAM_ECOCHK, error->gam_ecochk); + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + const struct drm_i915_error_engine *ee = &error->engine[i]; + struct intel_engine_cs *engine = dev_priv->engine[i]; + + if (!ee->batchbuffer) + continue; + + i915_aub_register(aub, RING_MODE_GEN7(engine), + _MASKED_BIT_ENABLE(ee->vm_info.gfx_mode)); + i915_aub_register(aub, RING_HWS_PGA(engine->mmio_base), + ee->hws); + } + + i915_aub_comment(aub, "PPGTT PML4/PDP/PD"); + for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) { + const struct drm_i915_error_pagemap_lvl *pml4 = + &error->ppgtt_pml4[i]; + int l3, l2; + + if (!error->active_vm[i]) + break; + + if (pml4->storage) + i915_aub_gtt(aub, PPGTT_LEVEL4, pml4->paddr, + pml4->storage, GEN8_PML4ES_PER_PML4); + + for (l3 = 0; l3 < pml4->nxt_lvl_count; l3++) { + const struct drm_i915_error_pagemap_lvl *pdp = + &pml4->nxt_lvl[l3]; + + if (pdp->storage) + i915_aub_gtt(aub, PPGTT_LEVEL3, pdp->paddr, + pdp->storage, GEN8_4LVL_PDPES); + + for (l2 = 0; l2 < pdp->nxt_lvl_count; l2++) { + const struct drm_i915_error_pagemap_lvl *pd = + &pdp->nxt_lvl[l2]; + + i915_aub_gtt(aub, PPGTT_LEVEL2, pd->paddr, + pd->storage, I915_PDES); + } + } + } + + /* Active request */ + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + const struct drm_i915_error_engine *ee = &error->engine[i]; + struct intel_engine_cs *engine = dev_priv->engine[i]; + int j; + + if (!ee->batchbuffer) + continue; + + i915_aub_comment(aub, "Engine %s", engine->name); + + if (ee->hws_page) { + AUB_COMMENT_ERROR_OBJ("Hardware Status Page", + ee->hws_page); + i915_aub_buffer(aub, true, ee->hws_page->tiling, + ee->hws_page->pages, + ee->hws_page->page_count); + } + + if (ee->ctx) { + u64 gtt_offset = + ee->ctx->gtt_offset + LRC_GUCSHR_SZ * PAGE_SIZE; + u64 gtt_size = + ee->ctx->gtt_size - LRC_GUCSHR_SZ * PAGE_SIZE; + + i915_aub_comment(aub, + "Logical Ring Context (%08x_%08x %8u)", + upper_32_bits(gtt_offset), + lower_32_bits(gtt_offset), + gtt_size); + i915_aub_context(aub, engine->class, + ee->ctx->pages + LRC_GUCSHR_SZ, + ee->ctx->page_count - LRC_GUCSHR_SZ); + } + + if (ee->renderstate) { + AUB_COMMENT_ERROR_OBJ("Renderstate", ee->renderstate); + i915_aub_batchbuffer(aub, true, ee->renderstate->pages, + ee->renderstate->page_count); + } + + if (ee->wa_batchbuffer) { + AUB_COMMENT_ERROR_OBJ("Scratch", ee->wa_batchbuffer); + i915_aub_buffer(aub, true, I915_TILING_NONE, + ee->wa_batchbuffer->pages, + ee->wa_batchbuffer->page_count); + } + + if (ee->wa_ctx) { + AUB_COMMENT_ERROR_OBJ("WA context", ee->wa_ctx); + i915_aub_batchbuffer(aub, true, ee->wa_ctx->pages, + ee->wa_ctx->page_count); + } + + if (ee->ringbuffer) { + AUB_COMMENT_ERROR_OBJ("Ringbuffer", ee->ringbuffer); + i915_aub_batchbuffer(aub, true, ee->ringbuffer->pages, + ee->ringbuffer->page_count); + } + + if (ee->batchbuffer) { + AUB_COMMENT_ERROR_OBJ("Batchbuffer", ee->batchbuffer); + i915_aub_batchbuffer(aub, false, ee->batchbuffer->pages, + ee->batchbuffer->page_count); + } + + for (j = 0; j < ee->user_bo_count; j++) { + struct drm_i915_error_object *obj = ee->user_bo[j]; + + AUB_COMMENT_ERROR_OBJ("BO", obj); + i915_aub_buffer(aub, false, obj->tiling, + obj->pages, obj->page_count); + } + + /* XXX: Do I want to overwrite the head/tail inside the lrc? */ + i915_aub_comment(aub, "ELSP submissions"); + for (j = 0; j < ee->num_requests; j++) + i915_aub_elsp_submit(aub, engine, + ee->requests[j].lrc_desc); + } + + i915_aub_stop(aub); + + if (m->bytes == 0 && m->err) + return m->err; + return 0; } -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx