With this patch, we now have some parity between xe_devcoredump and the simple_error_capture. The only difference is that xe_devcoredump will only stash the 'first' hang, which is the one that we care most and should analyze first, while simple_error_capture will dump them all the kernel log. But this is just a start point to start building a useful and organized crash dump, using standard infrastructure. Later this will be changed to have output that can be parsed by tools and used for error replay. Also, it is important to highlight that the goal is not to replace the simple_error_capture which is still useful for some cases. But simple_error_capture should be protected under DEBUG and EXPERT flags, while the devcoredump has its own production config and will be useful for bug reporting and for error replay. Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> --- drivers/gpu/drm/xe/xe_devcoredump.c | 6 ++++++ drivers/gpu/drm/xe/xe_devcoredump_types.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 1ffd12646a99..9dbafd586fbd 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -16,6 +16,7 @@ #include "xe_guc_ct.h" #include "xe_guc_submit.h" #include "xe_hw_engine.h" +#include "xe_vm.h" /** * DOC: Xe device coredump @@ -103,6 +104,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, for_each_hw_engine(hwe, e->gt, id) xe_hw_engine_snapshot_print(coredump->snapshot.hwe[id], &p); + drm_printf(&p, "\n**** VM ****\n"); + xe_vm_snapshot_print(coredump->snapshot.vm, &p); + mutex_unlock(&coredump->lock); return count - iter.remain; @@ -124,6 +128,7 @@ static void xe_devcoredump_free(void *data) xe_guc_engine_snapshot_free(coredump->snapshot.ge); for_each_hw_engine(hwe, coredump->faulty_engine->gt, id) xe_hw_engine_snapshot_free(coredump->snapshot.hwe[id]); + xe_vm_snapshot_free(coredump->snapshot.vm); coredump->faulty_engine = NULL; drm_info(&coredump_to_xe(coredump)->drm, @@ -172,6 +177,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump) coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe); } + coredump->snapshot.vm = xe_vm_snapshot_capture(e->vm, e->gt->info.id); xe_force_wake_put(gt_to_fw(e->gt), XE_FORCEWAKE_ALL); dma_fence_end_signalling(cookie); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 8b17ecf1b6e6..f508eca292f7 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -31,8 +31,11 @@ struct xe_devcoredump_snapshot { struct xe_guc_ct_snapshot *ct; /** @ge: Guc Engine snapshot */ struct xe_guc_submit_engine_snapshot *ge; + /** @hwe: HW Engine snapshot array */ struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES]; + /** @vm: VM snapshot */ + struct xe_vm_snapshot *vm; }; /** -- 2.39.2