On Wed, Apr 26, 2023 at 04:57:07PM -0400, Rodrigo Vivi wrote: > The goal is to allow for a snapshot capture to be taken at the time > of the crash, while the print out can happen at a later time through > the exposed devcoredump virtual device. > > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> > --- > drivers/gpu/drm/xe/xe_guc_submit.c | 212 +++++++++++++++++++---- > drivers/gpu/drm/xe/xe_guc_submit.h | 10 +- > drivers/gpu/drm/xe/xe_guc_submit_types.h | 91 ++++++++++ > 3 files changed, 274 insertions(+), 39 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c > index a5fe7755ce4c..9c06411f857f 100644 > --- a/drivers/gpu/drm/xe/xe_guc_submit.c > +++ b/drivers/gpu/drm/xe/xe_guc_submit.c > @@ -1596,75 +1596,211 @@ int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) > return 0; > } > > -static void guc_engine_wq_print(struct xe_engine *e, struct drm_printer *p) > +static void > +guc_engine_wq_snapshot_capture(struct xe_engine *e, > + struct xe_guc_submit_engine_snapshot *snapshot) > { > struct xe_guc *guc = engine_to_guc(e); > struct xe_device *xe = guc_to_xe(guc); > struct iosys_map map = xe_lrc_parallel_map(e->lrc); > int i; > > + snapshot->guc.wqi_head = e->guc->wqi_head; > + snapshot->guc.wqi_tail = e->guc->wqi_tail; > + snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); > + snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); > + snapshot->parallel.wq_desc.status = parallel_read(xe, map, > + wq_desc.wq_status); > + > + if (snapshot->parallel.wq_desc.head != > + snapshot->parallel.wq_desc.tail) { > + for (i = snapshot->parallel.wq_desc.head; > + i != snapshot->parallel.wq_desc.tail; > + i = (i + sizeof(u32)) % WQ_SIZE) > + snapshot->parallel.wq[i / sizeof(u32)] = > + parallel_read(xe, map, wq[i / sizeof(u32)]); > + } > +} > + > +static void > +guc_engine_wq_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, > + struct drm_printer *p) > +{ > + int i; > + > drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", > - e->guc->wqi_head, parallel_read(xe, map, wq_desc.head)); > + snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); > drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", > - e->guc->wqi_tail, parallel_read(xe, map, wq_desc.tail)); > - drm_printf(p, "\tWQ status: %u\n", > - parallel_read(xe, map, wq_desc.wq_status)); > - if (parallel_read(xe, map, wq_desc.head) != > - parallel_read(xe, map, wq_desc.tail)) { > - for (i = parallel_read(xe, map, wq_desc.head); > - i != parallel_read(xe, map, wq_desc.tail); > + snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); > + drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); > + > + if (snapshot->parallel.wq_desc.head != > + snapshot->parallel.wq_desc.tail) { > + for (i = snapshot->parallel.wq_desc.head; > + i != snapshot->parallel.wq_desc.tail; > i = (i + sizeof(u32)) % WQ_SIZE) > drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), > - parallel_read(xe, map, wq[i / sizeof(u32)])); > + snapshot->parallel.wq[i / sizeof(u32)]); > } > } > > -static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) > +/** > + * xe_guc_engine_snapshot_capture - Take a quick snapshot of the GuC Engine. > + * @e: Xe Engine. > + * > + * This can be printed out in a later stage like during dev_coredump > + * analysis. > + * > + * Returns: a GuC Submit Engine snapshot object that must be freed by the > + * caller, using `xe_guc_engine_snapshot_free`. > + */ > +struct xe_guc_submit_engine_snapshot * > +xe_guc_engine_snapshot_capture(struct xe_engine *e) > { > struct drm_gpu_scheduler *sched = &e->guc->sched; > struct xe_sched_job *job; > + struct xe_guc_submit_engine_snapshot *snapshot; > int i; > > - drm_printf(p, "\nGuC ID: %d\n", e->guc->id); > - drm_printf(p, "\tName: %s\n", e->name); > - drm_printf(p, "\tClass: %d\n", e->class); > - drm_printf(p, "\tLogical mask: 0x%x\n", e->logical_mask); > - drm_printf(p, "\tWidth: %d\n", e->width); > - drm_printf(p, "\tRef: %d\n", kref_read(&e->refcount)); > - drm_printf(p, "\tTimeout: %ld (ms)\n", sched->timeout); > - drm_printf(p, "\tTimeslice: %u (us)\n", e->sched_props.timeslice_us); > - drm_printf(p, "\tPreempt timeout: %u (us)\n", > - e->sched_props.preempt_timeout_us); > + snapshot = kzalloc(sizeof(struct xe_guc_submit_engine_snapshot), > + GFP_ATOMIC); For the whole file you need to check if any of the allocs fail. Also let's say if just the last alloc fails I wouldn't fail the entire capture, just abort at that point (i.e. some info is better than none). Matt > + > + snapshot->guc.id = e->guc->id; > + memcpy(&snapshot->name, &e->name, sizeof(snapshot->name)); > + snapshot->class = e->class; > + snapshot->logical_mask = e->logical_mask; > + snapshot->width = e->width; > + snapshot->refcount = kref_read(&e->refcount); > + snapshot->sched_timeout = sched->timeout; > + snapshot->sched_props.timeslice_us = e->sched_props.timeslice_us; > + snapshot->sched_props.preempt_timeout_us = > + e->sched_props.preempt_timeout_us; > + > + snapshot->lrc = kmalloc_array(e->width, sizeof(struct lrc_snapshot), > + GFP_ATOMIC); > + > for (i = 0; i < e->width; ++i ) { > struct xe_lrc *lrc = e->lrc + i; > > + snapshot->lrc[i].context_desc = > + lower_32_bits(xe_lrc_ggtt_addr(lrc)); > + snapshot->lrc[i].head = xe_lrc_ring_head(lrc); > + snapshot->lrc[i].tail.internal = lrc->ring.tail; > + snapshot->lrc[i].tail.memory = > + xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); > + snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc); > + snapshot->lrc[i].seqno = xe_lrc_seqno(lrc); > + > + } > + > + snapshot->schedule_state = atomic_read(&e->guc->state); > + snapshot->engine_flags = e->flags; > + > + snapshot->parallel_execution = xe_engine_is_parallel(e); > + if (snapshot->parallel_execution) > + guc_engine_wq_snapshot_capture(e, snapshot); > + > + spin_lock(&sched->job_list_lock); > + snapshot->pending_list_size = list_count_nodes(&sched->pending_list); > + snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, > + sizeof(struct pending_list_snapshot), > + GFP_ATOMIC); > + i = 0; > + list_for_each_entry(job, &sched->pending_list, drm.list) { > + snapshot->pending_list[i].seqno = xe_sched_job_seqno(job); > + snapshot->pending_list[i].fence = > + dma_fence_is_signaled(job->fence) ? 1 : 0; > + snapshot->pending_list[i].finished = > + dma_fence_is_signaled(&job->drm.s_fence->finished) > + ? 1 : 0; > + i++; > + } > + spin_unlock(&sched->job_list_lock); > + > + return snapshot; > +} > + > +/** > + * xe_guc_engine_snapshot_print - Print out a given GuC Engine snapshot. > + * @snapshot: GuC Submit Engine snapshot object. > + * @p: drm_printer where it will be printed out. > + * > + * This function prints out a given GuC Submit Engine snapshot object. > + */ > +void > +xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, > + struct drm_printer *p) > +{ > + int i; > + > + drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); > + drm_printf(p, "\tName: %s\n", snapshot->name); > + drm_printf(p, "\tClass: %d\n", snapshot->class); > + drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); > + drm_printf(p, "\tWidth: %d\n", snapshot->width); > + drm_printf(p, "\tRef: %d\n", snapshot->refcount); > + drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); > + drm_printf(p, "\tTimeslice: %u (us)\n", > + snapshot->sched_props.timeslice_us); > + drm_printf(p, "\tPreempt timeout: %u (us)\n", > + snapshot->sched_props.preempt_timeout_us); > + > + for (i = 0; i < snapshot->width; ++i ) { > drm_printf(p, "\tHW Context Desc: 0x%08x\n", > - lower_32_bits(xe_lrc_ggtt_addr(lrc))); > + snapshot->lrc[i].context_desc); > drm_printf(p, "\tLRC Head: (memory) %u\n", > - xe_lrc_ring_head(lrc)); > + snapshot->lrc[i].head); > drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", > - lrc->ring.tail, > - xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL)); > + snapshot->lrc[i].tail.internal, > + snapshot->lrc[i].tail.memory); > drm_printf(p, "\tStart seqno: (memory) %d\n", > - xe_lrc_start_seqno(lrc)); > - drm_printf(p, "\tSeqno: (memory) %d\n", xe_lrc_seqno(lrc)); > + snapshot->lrc[i].start_seqno); > + drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno); > } > - drm_printf(p, "\tSchedule State: 0x%x\n", atomic_read(&e->guc->state)); > - drm_printf(p, "\tFlags: 0x%lx\n", e->flags); > - if (xe_engine_is_parallel(e)) > - guc_engine_wq_print(e, p); > + drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); > + drm_printf(p, "\tFlags: 0x%lx\n", snapshot->engine_flags); > > - spin_lock(&sched->job_list_lock); > + if (snapshot->parallel_execution) > + guc_engine_wq_snapshot_print(snapshot, p); > > - list_for_each_entry(job, &sched->pending_list, drm.list) > + for(i = 0; i < snapshot->pending_list_size; i++) > drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", > - xe_sched_job_seqno(job), > - dma_fence_is_signaled(job->fence) ? 1 : 0, > - dma_fence_is_signaled(&job->drm.s_fence->finished) ? > - 1 : 0); > - spin_unlock(&sched->job_list_lock); > + snapshot->pending_list[i].seqno, > + snapshot->pending_list[i].fence, > + snapshot->pending_list[i].finished); > +} > + > +/** > + * xe_guc_engine_snapshot_free - Free all allocated objects for a given > + * snapshot. > + * @snapshot: GuC Submit Engine snapshot object. > + * > + * This function free all the memory that needed to be allocated at capture > + * time. > + */ > +void xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot) > +{ > + kfree(snapshot->lrc); > + kfree(snapshot->pending_list); > + kfree(snapshot); > +} > + > +static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) > +{ > + struct xe_guc_submit_engine_snapshot *snapshot; > + > + snapshot = xe_guc_engine_snapshot_capture(e); > + xe_guc_engine_snapshot_print(snapshot, p); > + xe_guc_engine_snapshot_free(snapshot); > } > > +/** > + * xe_guc_submit_print - GuC Submit Print. > + * @guc: GuC. > + * @p: drm_printer where it will be printed out. > + * > + * This function capture and prints snapshots of **all** GuC Engines. > + */ > void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) > { > struct xe_engine *e; > diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h > index 8002734d6f24..4153c2d22013 100644 > --- a/drivers/gpu/drm/xe/xe_guc_submit.h > +++ b/drivers/gpu/drm/xe/xe_guc_submit.h > @@ -13,7 +13,6 @@ struct xe_engine; > struct xe_guc; > > int xe_guc_submit_init(struct xe_guc *guc); > -void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); > > int xe_guc_submit_reset_prepare(struct xe_guc *guc); > void xe_guc_submit_reset_wait(struct xe_guc *guc); > @@ -27,4 +26,13 @@ int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, > u32 len); > int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); > > +struct xe_guc_submit_engine_snapshot * > +xe_guc_engine_snapshot_capture(struct xe_engine *e); > +void > +xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot, > + struct drm_printer *p); > +void > +xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot); > +void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); > + > #endif > diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h > index d369ea0bad60..0b726609dc14 100644 > --- a/drivers/gpu/drm/xe/xe_guc_submit_types.h > +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h > @@ -61,4 +61,95 @@ struct guc_submit_parallel_scratch { > u32 wq[WQ_SIZE / sizeof(u32)]; > }; > > +struct lrc_snapshot { > + u32 context_desc; > + u32 head; > + struct { > + u32 internal; > + u32 memory; > + } tail; > + u32 start_seqno; > + u32 seqno; > +}; > + > +struct pending_list_snapshot { > + u32 seqno; > + bool fence; > + bool finished; > +}; > + > +/** > + * struct xe_guc_submit_engine_snapshot - Snapshot for devcoredump > + */ > +struct xe_guc_submit_engine_snapshot { > + /** @name: name of this engine */ > + char name[MAX_FENCE_NAME_LEN]; > + /** @class: class of this engine */ > + enum xe_engine_class class; > + /** > + * @logical_mask: logical mask of where job submitted to engine can run > + */ > + u32 logical_mask; > + /** @width: width (number BB submitted per exec) of this engine */ > + u16 width; > + /** @refcount: ref count of this engine */ > + u32 refcount; > + /** > + * @sched_timeout: the time after which a job is removed from the > + * scheduler. > + */ > + long sched_timeout; > + > + /** @sched_props: scheduling properties */ > + struct { > + /** @timeslice_us: timeslice period in micro-seconds */ > + u32 timeslice_us; > + /** @preempt_timeout_us: preemption timeout in micro-seconds */ > + u32 preempt_timeout_us; > + } sched_props; > + > + /** @lrc: LRC Snapshot */ > + struct lrc_snapshot *lrc; > + > + /** @schedule_state: Schedule State at the moment of Crash */ > + u32 schedule_state; > + /** @engine_flags: Flags of the faulty engine */ > + unsigned long engine_flags; > + > + /** @guc: GuC Engine Snapshot */ > + struct { > + /** @wqi_head: work queue item head */ > + u32 wqi_head; > + /** @wqi_tail: work queue item tail */ > + u32 wqi_tail; > + /** @id: GuC id for this xe_engine */ > + u16 id; > + } guc; > + > + /** > + * @parallel_execution: Indication if the failure was during parallel > + * execution > + */ > + bool parallel_execution; > + /** @parallel: snapshot of the useful parallel scratch */ > + struct { > + /** @wq_desc: Workqueue description */ > + struct { > + /** @head: Workqueue Head */ > + u32 head; > + /** @tail: Workqueue Tail */ > + u32 tail; > + /** @status: Workqueue Status */ > + u32 status; > + } wq_desc; > + /** @wq: Workqueue Items */ > + u32 wq[WQ_SIZE / sizeof(u32)]; > + } parallel; > + > + /** @pending_list_size: Size of the pending list snapshot array */ > + int pending_list_size; > + /** @pending_list: snapshot of the pending list info */ > + struct pending_list_snapshot *pending_list; > +}; > + > #endif > -- > 2.39.2 >