From: Fernando Pacheco <fernando.pacheco@xxxxxxxxx> Correctable and uncorrectable Shared Local Memory (SLM) ECC errors will be counted in two different Thread Dispatch Logic (TDL) registers. GuC will receive a message from TDL when the first correctable/uncorrectable error is detected by SLM (first after a reset or register clear). This message is then forwarded to the appropriate severity register. Correctable errors will route to kernel driver and uncorrectable errors are expected to route as PCIe Error. Although the option exists to route both as interrupts. Service the interrupt and read TDL registers for error count. Cc: Paulo Zanoni <paulo.r.zanoni@xxxxxxxxx> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> Cc: Fernando Pacheco <fernando.pacheco@xxxxxxxxx> Cc: Radhakrishna Sripada <radhakrishna.sripada@xxxxxxxxx> Signed-off-by: Fernando Pacheco <fernando.pacheco@xxxxxxxxx> Signed-off-by: Lucas De Marchi <lucas.demarchi@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_irq.c | 10 +++++++++- drivers/gpu/drm/i915/i915_reg.h | 7 +++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 17e679b910da..ca35edef492d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2536,7 +2536,7 @@ gen12_gt_hw_error_handler(struct drm_i915_private * const i915, { void __iomem * const regs = i915->uncore.regs; const char *hw_err_str = hardware_error_type_to_str(hw_err); - u32 other_errors = ~(EU_GRF_ERROR | EU_IC_ERROR); + u32 other_errors = ~(EU_GRF_ERROR | EU_IC_ERROR | SLM_ERROR); u32 errstat; lockdep_assert_held(&i915->irq_lock); @@ -2565,6 +2565,14 @@ gen12_gt_hw_error_handler(struct drm_i915_private * const i915, if (errstat & EU_IC_ERROR) DRM_ERROR("detected EU IC %s hardware error\n", hw_err_str); + if (errstat & SLM_ERROR) { + struct drm_i915_private *dev_priv = i915; + + DRM_ERROR("detected %u SLM %s hardware error(s)\n", + I915_READ(SLM_ECC_ERROR_CNTR(hw_err)), + hw_err_str); + } + /* * TODO: The remaining GT errors don't have a * need for targeted logging at the moment. We diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 40cb361b4254..b9c142f86611 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7765,6 +7765,13 @@ enum hardware_error { _ERR_STAT_GT_NONFATAL)) #define EU_GRF_ERROR (1 << 15) #define EU_IC_ERROR (1 << 14) +#define SLM_ERROR (1 << 13) + +#define _SLM_ECC_ERROR_CNT 0xe7f4 +#define _SLM_UNCORR_ECC_ERROR_CNT 0xe7c0 +#define SLM_ECC_ERROR_CNTR(x) _MMIO((x) == HARDWARE_ERROR_CORRECTABLE ? \ + _SLM_ECC_ERROR_CNT : \ + _SLM_UNCORR_ECC_ERROR_CNT) #define GEN11_RENDER_COPY_INTR_ENABLE _MMIO(0x190030) #define GEN11_VCS_VECS_INTR_ENABLE _MMIO(0x190034) -- 2.26.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx