Trying to enable printk debugging for GEM is fraught with the issue of spam; interactions with HW are very frequent and often boring. However, one instance where they are not so boring is just before a BUG; here ftrace provides a facility to dump its ringbuffer on an oops. So for CI let's enable trace_printk() to capture the last exchanges with HW as a death rattle. For example, [ 72.120722] ------------[ cut here ]------------ [ 72.120748] kernel BUG at drivers/gpu/drm/i915/intel_lrc.c:905! [ 72.120756] invalid opcode: 0000 [#1] SMP [ 72.120762] Dumping ftrace buffer: [ 72.120768] --------------------------------- ... [ 72.200424] gem_conc-1064 0..s1 71949306us : intel_lrc_irq_handler: bcs0 in: ctx=4.1, seqno=161 [ 72.200469] gem_conc-1064 0..s1 71949312us : intel_lrc_irq_handler: bcs0 in: ctx=6.2, seqno=160 [ 72.200512] gem_conc-1066 1..s1 71949325us : intel_lrc_irq_handler: bcs0 csb[4/4]: status=0x00008002 [ 72.200555] gem_conc-1066 1..s1 71949326us : intel_lrc_irq_handler: bcs0 out: ctx=6, seqno=160 [ 72.200601] gem_conc-1066 1..s. 71956923us : intel_lrc_irq_handler: bcs0 csb[5/5]: status=0x00000014 [ 72.200646] gem_conc-1066 1..s. 71956928us : intel_lrc_irq_handler: bcs0 out: ctx=6, seqno=160 [ 72.200690] gem_conc-1066 1..s. 71956939us : intel_lrc_irq_handler: bcs0 in: ctx=8.1, seqno=164 [ 72.200736] gem_conc-1066 1..s. 71956940us : intel_lrc_irq_handler: bcs0 in: ctx=4.2, seqno=162 [ 72.200780] gem_conc-1066 1..s. 71956951us : intel_lrc_irq_handler: bcs0 csb[0/0]: status=0x00008002 [ 72.200824] gem_conc-1066 1..s. 71956951us : intel_lrc_irq_handler: bcs0 out: ctx=4, seqno=162 [ 72.200867] <idle>-0 1..s1 71967999us : intel_lrc_irq_handler: bcs0 csb[1/1]: status=0x00000014 [ 72.200912] <idle>-0 1..s1 71968001us : intel_lrc_irq_handler: bcs0 out: ctx=4, seqno=162 [ 72.200956] <idle>-0 1.Ns1 71979385us : intel_lrc_irq_handler: bcs0 csb[2/2]: status=0x00000018 [ 72.201001] <idle>-0 1.Ns1 71979388us : intel_lrc_irq_handler: bcs0 out: ctx=8, seqno=164 [ 72.201044] gem_conc-1063 3..s1 72086825us : intel_lrc_irq_handler: bcs0 in: ctx=7.1, seqno=165 [ 72.201088] gem_conc-1066 1..s. 72086918us : intel_lrc_irq_handler: bcs0 csb[3/3]: status=0x00000001 [ 72.201132] gem_conc-1066 1..s. 72086932us : intel_lrc_irq_handler: bcs0 in: ctx=7.2, seqno=166 [ 72.201176] gem_conc-1066 1..s. 72086941us : intel_lrc_irq_handler: bcs0 csb[4/4]: status=0x00008002 [ 72.201219] gem_conc-1066 1..s. 72086941us : intel_lrc_irq_handler: bcs0 out: ctx=7, seqno=166 [ 72.201263] gem_conc-1066 1..s. 72103855us : intel_lrc_irq_handler: bcs0 csb[5/5]: status=0x00000018 [ 72.201307] gem_conc-1066 1..s. 72103858us : intel_lrc_irq_handler: bcs0 out: ctx=7, seqno=166 [ 72.201351] gem_conc-1064 0..s1 72116924us : intel_lrc_irq_handler: bcs0 in: ctx=4.1, seqno=167 [ 72.201394] gem_conc-1064 0..s1 72116981us : intel_lrc_irq_handler: bcs0 in: ctx=4.2, seqno=168 [ 72.201438] gem_conc-1066 1..s1 72117545us : intel_lrc_irq_handler: bcs0 csb[0/2]: status=0x00000012 [ 72.201482] gem_conc-1066 1..s1 72117547us : intel_lrc_irq_handler: bcs0 out: ctx=4, seqno=168 [ 72.201526] gem_conc-1066 1..s1 72117548us : intel_lrc_irq_handler: bcs0 csb[1/2]: status=0x00008002 [ 72.201570] gem_conc-1066 1..s1 72117548us : intel_lrc_irq_handler: bcs0 out: ctx=4, seqno=168 [ 72.201578] --------------------------------- v2: Tweak the formatting to be more consistent between in/out. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/Kconfig.debug | 14 ++++++++++++++ drivers/gpu/drm/i915/i915_gem.h | 6 ++++++ drivers/gpu/drm/i915/intel_lrc.c | 14 ++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 19c77c6feb24..9e53edbc713b 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -28,6 +28,7 @@ config DRM_I915_DEBUG select SW_SYNC # signaling validation framework (igt/syncobj*) select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST + select DRM_I915_TRACE_GEM default n help Choose this option to turn on extra driver debugging that may affect @@ -49,6 +50,19 @@ config DRM_I915_DEBUG_GEM If in doubt, say "N". +config DRM_I915_TRACE_GEM + bool "Insert extra ftrace output from the GEM internals" + select TRACING + default n + help + Enable additional and verbose debugging output that will spam + ordinary tests, but may be vital for post-mortem debugging when + used with /proc/sys/kernel/ftrace_dump_on_oops + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_I915_SW_FENCE_DEBUG_OBJECTS bool "Enable additional driver debugging for fence objects" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index ee54597465b6..d7905e4a1cf4 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -44,6 +44,12 @@ #define GEM_DEBUG_BUG_ON(expr) #endif +#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM) +#define GEM_TRACE(...) trace_printk(__VA_ARGS__) +#else +#define GEM_TRACE(...) +#endif + #define I915_NUM_ENGINES 5 #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 4f56ff5cb908..fb2a46f5a9c0 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -467,6 +467,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) port_set(&port[n], port_pack(rq, count)); desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); + + GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n", + engine->name, n, + rq->ctx->hw_id, + count, + rq->global_seqno); } else { GEM_BUG_ON(!n); desc = 0; @@ -521,6 +527,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) ce->ring->tail &= (ce->ring->size - 1); ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail; + GEM_TRACE("\n"); for (n = execlists_num_ports(&engine->execlists); --n; ) elsp_write(0, elsp); @@ -860,6 +867,9 @@ static void intel_lrc_irq_handler(unsigned long data) */ status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ + GEM_TRACE("%s csb[%d/%d]: status=0x%08x:0x%08x\n", + engine->name, head, tail, + status, buf[2*head + 1]); if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; @@ -887,6 +897,10 @@ static void intel_lrc_irq_handler(unsigned long data) GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); rq = port_unpack(port, &count); + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n", + engine->name, + rq->ctx->hw_id, count, + rq->global_seqno); GEM_BUG_ON(count == 0); if (--count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); -- 2.15.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx