In case of catastrophic errors GuC sends notification, which results in cryptic message. Let's add handler which, for starters, dumps state of affected engine. Signed-off-by: Andrzej Hajda <andrzej.hajda@xxxxxxxxx> --- .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 3 ++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 31 +++++++++++++++++++ 4 files changed, 37 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index f359bef046e0b2..f9a1c5642855e3 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -138,6 +138,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, + INTEL_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002, INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 804133df1ac9b4..61b412732d095a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -445,6 +445,8 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); int intel_guc_error_capture_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); +int intel_guc_cat_error_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); struct intel_engine_cs * intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 2b22065e87bf9a..f55f724e264407 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1035,6 +1035,9 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r CT_ERROR(ct, "Received GuC exception notification!\n"); ret = 0; break; + case INTEL_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR: + ret = intel_guc_cat_error_process_msg(guc, payload, len); + break; default: ret = -EOPNOTSUPP; break; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 693b07a977893d..94f91dfa3ec456 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -4659,6 +4659,37 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, return 0; } +int intel_guc_cat_error_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct intel_engine_cs *engine; + struct intel_context *ce; + struct drm_printer p; + unsigned long flags; + int ctx_id; + + if (unlikely(len != 1)) { + drm_dbg(&i915->drm, "Invalid length %u", len); + return -EPROTO; + } + ctx_id = msg[0]; + + xa_lock_irqsave(&guc->context_lookup, flags); + ce = g2h_context_lookup(guc, ctx_id); + if (ce) + engine = ce->engine; + xa_unlock_irqrestore(&guc->context_lookup, flags); + if (unlikely(!ce || !engine)) + return -EPROTO; + + drm_err(&i915->drm, "%s: CAT error reported by GuC\n", engine->name); + p = drm_info_printer(i915->drm.dev); + intel_engine_dump(engine, &p, "%s\n", engine->name); + + return 0; +} + void intel_guc_find_hung_context(struct intel_engine_cs *engine) { struct intel_guc *guc = &engine->gt->uc.guc; -- 2.34.1