Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@xxxxxxxxx>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_gpu_error.c | 74 ++++++++++++++++++---------
1 file changed, 50 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b20bd6365615b..225f1b11a6b93 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1370,14 +1370,14 @@ static void engine_record_execlists(struct intel_engine_coredump *ee)
}
static bool record_context(struct i915_gem_context_coredump *e,
- const struct i915_request *rq)
+ struct intel_context *ce)
{
struct i915_gem_context *ctx;
struct task_struct *task;
bool simulated;
rcu_read_lock();
- ctx = rcu_dereference(rq->context->gem_context);
+ ctx = rcu_dereference(ce->gem_context);
if (ctx && !kref_get_unless_zero(&ctx->ref))
ctx = NULL;
rcu_read_unlock();
@@ -1396,8 +1396,8 @@ static bool record_context(struct i915_gem_context_coredump *e,
e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count);
- e->total_runtime = intel_context_get_total_runtime_ns(rq->context);
- e->avg_runtime = intel_context_get_avg_runtime_ns(rq->context);
+ e->total_runtime = intel_context_get_total_runtime_ns(ce);
+ e->avg_runtime = intel_context_get_avg_runtime_ns(ce);
simulated = i915_gem_context_no_error_capture(ctx);
@@ -1532,15 +1532,37 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_
return ee;
}
+static struct intel_engine_capture_vma *
+engine_coredump_add_context(struct intel_engine_coredump *ee,
+ struct intel_context *ce,
+ gfp_t gfp)
+{
+ struct intel_engine_capture_vma *vma = NULL;
+
+ ee->simulated |= record_context(&ee->context, ce);
+ if (ee->simulated)
+ return NULL;
+
+ /*
+ * We need to copy these to an anonymous buffer
+ * as the simplest method to avoid being overwritten
+ * by userspace.
+ */
+ vma = capture_vma(vma, ce->ring->vma, "ring", gfp);
+ vma = capture_vma(vma, ce->state, "HW context", gfp);
+
+ return vma;
+}
+
struct intel_engine_capture_vma *
intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
struct i915_request *rq,
gfp_t gfp)
{
- struct intel_engine_capture_vma *vma = NULL;
+ struct intel_engine_capture_vma *vma;
- ee->simulated |= record_context(&ee->context, rq);
- if (ee->simulated)
+ vma = engine_coredump_add_context(ee, rq->context, gfp);
+ if (!vma)
return NULL;
/*
@@ -1550,8 +1572,6 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
*/
vma = capture_vma_snapshot(vma, rq->batch_res, gfp, "batch");
vma = capture_user(vma, rq, gfp);
- vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
- vma = capture_vma(vma, rq->context->state, "HW context", gfp);
ee->rq_head = rq->head;
ee->rq_post = rq->postfix;
@@ -1604,25 +1624,31 @@ capture_engine(struct intel_engine_cs *engine,
return NULL;
intel_engine_get_hung_entity(engine, &ce, &rq);
- if (!rq || !i915_request_started(rq))
- goto no_request_capture;
+ if (rq && !i915_request_started(rq)) {
+ drm_info(&engine->gt->i915->drm, "Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n",
+ engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id);
+ i915_request_put(rq);
+ rq = NULL;
+ }
- capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
- if (!capture)
- goto no_request_capture;
- if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
- intel_guc_capture_get_matching_node(engine->gt, ee, ce);
+ if (rq) {
+ capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
+ i915_request_put(rq);
+ } else if (ce) {
+ capture = engine_coredump_add_context(ee, ce, ATOMIC_MAYFAIL);
+ }
- intel_engine_coredump_add_vma(ee, capture, compress);
- i915_request_put(rq);
+ if (capture) {
+ intel_engine_coredump_add_vma(ee, capture, compress);
- return ee;
+ if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
+ intel_guc_capture_get_matching_node(engine->gt, ee, ce);
+ } else {
+ kfree(ee);
+ ee = NULL;
+ }
-no_request_capture:
- if (rq)
- i915_request_put(rq);
- kfree(ee);
- return NULL;
+ return ee;
}
static void