One minor NIT (though i hope it could be fixed otw in as it adds a bit of ease-of-log-readibility). That said, everything else looks good. Reviewed-by: Alan Previn <alan.previn.teres.alexis@xxxxxxxxx> On Wed, 2022-07-27 at 19:20 -0700, John.C.Harrison@xxxxxxxxx wrote: > From: John Harrison <John.C.Harrison@xxxxxxxxx> > > When debugging GuC communication issues, it is useful to have the CTB > info available. So add the state and buffer contents to the error > capture log. > > Also, add a sub-structure for the GuC specific error capture info as > it is now becoming numerous. > > Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gpu_error.c | 59 +++++++++++++++++++++++---- > drivers/gpu/drm/i915/i915_gpu_error.h | 20 +++++++-- > 2 files changed, 67 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c > index addba75252343..543ba63f958ea 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -671,6 +671,18 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m, > pdev->subsystem_device); > } > > +static void err_print_guc_ctb(struct drm_i915_error_state_buf *m, > + const char *name, > + const struct intel_ctb_coredump *ctb) > +{ > + if (!ctb->size) > + return; > + > + err_printf(m, "GuC %s CTB: raw: 0x%08X, 0x%08X/%08X, cached: 0x%08X/%08X, desc = 0x%08X, buf = 0x%08X x 0x%08X\n", > + name, ctb->raw_status, ctb->raw_head, ctb->raw_tail, > + ctb->head, ctb->tail, ctb->desc_offset, ctb->cmds_offset, ctb->size); > NIT: to make it more readible on first glance, would be nice to add more descriptive text like "raw: Sts:0x%08X, Hd:0x%08X,Tl:0x@08X..." also, the not sure why cmds_offset is presented with a "x size" as opposed to just "desc-off = foo1, cmd-off = foo2, size = foo3"? > +} > + > static void err_print_uc(struct drm_i915_error_state_buf *m, > const struct intel_uc_coredump *error_uc) > { > @@ -678,8 +690,12 @@ static void err_print_uc(struct drm_i915_error_state_buf *m, > > intel_uc_fw_dump(&error_uc->guc_fw, &p); > intel_uc_fw_dump(&error_uc->huc_fw, &p); > - err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->timestamp); > - intel_gpu_error_print_vma(m, NULL, error_uc->guc_log); > + err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->guc.timestamp); > + intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_log); > + err_printf(m, "GuC CTB fence: %d\n", error_uc->guc.last_fence); > + err_print_guc_ctb(m, "Send", error_uc->guc.ctb + 0); > + err_print_guc_ctb(m, "Recv", error_uc->guc.ctb + 1); > + intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_ctb); > } > > static void err_free_sgl(struct scatterlist *sgl) > @@ -854,7 +870,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, > if (error->gt) { > bool print_guc_capture = false; > > - if (error->gt->uc && error->gt->uc->is_guc_capture) > + if (error->gt->uc && error->gt->uc->guc.is_guc_capture) > print_guc_capture = true; > > err_print_gt_display(m, error->gt); > @@ -1009,7 +1025,8 @@ static void cleanup_uc(struct intel_uc_coredump *uc) > { > kfree(uc->guc_fw.path); > kfree(uc->huc_fw.path); > - i915_vma_coredump_free(uc->guc_log); > + i915_vma_coredump_free(uc->guc.vma_log); > + i915_vma_coredump_free(uc->guc.vma_ctb); > > kfree(uc); > } > @@ -1658,6 +1675,23 @@ gt_record_engines(struct intel_gt_coredump *gt, > } > } > > +static void gt_record_guc_ctb(struct intel_ctb_coredump *saved, > + const struct intel_guc_ct_buffer *ctb, > + const void *blob_ptr, struct intel_guc *guc) > +{ > + if (!ctb || !ctb->desc) > + return; > + > + saved->raw_status = ctb->desc->status; > + saved->raw_head = ctb->desc->head; > + saved->raw_tail = ctb->desc->tail; > + saved->head = ctb->head; > + saved->tail = ctb->tail; > + saved->size = ctb->size; > + saved->desc_offset = ((void *)ctb->desc) - blob_ptr; > + saved->cmds_offset = ((void *)ctb->cmds) - blob_ptr; > +} > + > static struct intel_uc_coredump * > gt_record_uc(struct intel_gt_coredump *gt, > struct i915_vma_compress *compress) > @@ -1684,9 +1718,16 @@ gt_record_uc(struct intel_gt_coredump *gt, > * log times to system times (in conjunction with the error->boottime and > * gt->clock_frequency fields saved elsewhere). > */ > - error_uc->timestamp = intel_uncore_read(gt->_gt->uncore, GUCPMTIMESTAMP); > - error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma, > - "GuC log buffer", compress); > + error_uc->guc.timestamp = intel_uncore_read(gt->_gt->uncore, GUCPMTIMESTAMP); > + error_uc->guc.vma_log = create_vma_coredump(gt->_gt, uc->guc.log.vma, > + "GuC log buffer", compress); > + error_uc->guc.vma_ctb = create_vma_coredump(gt->_gt, uc->guc.ct.vma, > + "GuC CT buffer", compress); > + error_uc->guc.last_fence = uc->guc.ct.requests.last_fence; > + gt_record_guc_ctb(error_uc->guc.ctb + 0, &uc->guc.ct.ctbs.send, > + uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc); > + gt_record_guc_ctb(error_uc->guc.ctb + 1, &uc->guc.ct.ctbs.recv, > + uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc); > > return error_uc; > } > @@ -2039,9 +2080,9 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du > error->gt->uc = gt_record_uc(error->gt, compress); > if (error->gt->uc) { > if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE) > - error->gt->uc->is_guc_capture = true; > + error->gt->uc->guc.is_guc_capture = true; > else > - GEM_BUG_ON(error->gt->uc->is_guc_capture); > + GEM_BUG_ON(error->gt->uc->guc.is_guc_capture); > } > } > > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h > index d8a8b3d529e09..efc75cc2ffdb9 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.h > +++ b/drivers/gpu/drm/i915/i915_gpu_error.h > @@ -125,6 +125,15 @@ struct intel_engine_coredump { > struct intel_engine_coredump *next; > }; > > +struct intel_ctb_coredump { > + u32 raw_head, head; > + u32 raw_tail, tail; > + u32 raw_status; > + u32 desc_offset; > + u32 cmds_offset; > + u32 size; > +}; > + > struct intel_gt_coredump { > const struct intel_gt *_gt; > bool awake; > @@ -165,9 +174,14 @@ struct intel_gt_coredump { > struct intel_uc_coredump { > struct intel_uc_fw guc_fw; > struct intel_uc_fw huc_fw; > - struct i915_vma_coredump *guc_log; > - u32 timestamp; > - bool is_guc_capture; > + struct guc_info { > + struct intel_ctb_coredump ctb[2]; > + struct i915_vma_coredump *vma_ctb; > + struct i915_vma_coredump *vma_log; > + u32 timestamp; > + u16 last_fence; > + bool is_guc_capture; > + } guc; > } *uc; > > struct intel_gt_coredump *next; > -- > 2.37.1 >