[PATCH] drm/i915: Lock the engine while dumping the active request

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We cannot let the request be retired and freed while we are trying to
dump it during error capture. It is not sufficient just to grab a
reference to the request, as during retirement we may free the ring
which we are also dumping. So take the engine lock to prevent retiring
and freeing of the request.

Reported-by: Alex Shumsky <alexthreed@xxxxxxxxx>
Fixes: 83c317832eb1 ("drm/i915: Dump the ringbuffer of the active request for debugging")
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Cc: Alex Shumsky <alexthreed@xxxxxxxxx>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 51 ++++++++++++-----------
 drivers/gpu/drm/i915/i915_gpu_error.c     |  6 ++-
 2 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index bdf279fa3b2e..8ee2dd423674 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1444,9 +1444,11 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 	}
 }
 
-static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
+static void print_request_ring(struct drm_printer *m,
+			       const struct i915_request *rq,
+			       const struct intel_ring *ring)
 {
-	void *ring;
+	void *buf;
 	int size;
 
 	drm_printf(m,
@@ -1457,23 +1459,23 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
 
 	size = rq->tail - rq->head;
 	if (rq->tail < rq->head)
-		size += rq->ring->size;
+		size += ring->size;
 
-	ring = kmalloc(size, GFP_ATOMIC);
-	if (ring) {
-		const void *vaddr = rq->ring->vaddr;
+	buf = kmalloc(size, GFP_ATOMIC);
+	if (buf) {
+		const void *vaddr = ring->vaddr;
 		unsigned int head = rq->head;
 		unsigned int len = 0;
 
 		if (rq->tail < head) {
-			len = rq->ring->size - head;
-			memcpy(ring, vaddr + head, len);
+			len = ring->size - head;
+			memcpy(buf, vaddr + head, len);
 			head = 0;
 		}
-		memcpy(ring + len, vaddr + head, size - len);
+		memcpy(buf + len, vaddr + head, size - len);
 
-		hexdump(m, ring, size);
-		kfree(ring);
+		hexdump(m, buf, size);
+		kfree(buf);
 	}
 }
 
@@ -1484,6 +1486,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_request *rq;
 	intel_wakeref_t wakeref;
+	unsigned long flags;
 
 	if (header) {
 		va_list ap;
@@ -1503,31 +1506,31 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		   i915_reset_engine_count(error, engine),
 		   i915_reset_count(error));
 
-	rcu_read_lock();
-
 	drm_printf(m, "\tRequests:\n");
 
+	spin_lock_irqsave(&engine->active.lock, flags);
 	rq = intel_engine_find_active_request(engine);
 	if (rq) {
+		struct intel_ring *ring = rq->ring;
+
 		print_request(m, rq, "\t\tactive ");
 
 		drm_printf(m, "\t\tring->start:  0x%08x\n",
-			   i915_ggtt_offset(rq->ring->vma));
+			   i915_ggtt_offset(ring->vma));
 		drm_printf(m, "\t\tring->head:   0x%08x\n",
-			   rq->ring->head);
+			   ring->head);
 		drm_printf(m, "\t\tring->tail:   0x%08x\n",
-			   rq->ring->tail);
+			   ring->tail);
 		drm_printf(m, "\t\tring->emit:   0x%08x\n",
-			   rq->ring->emit);
+			   ring->emit);
 		drm_printf(m, "\t\tring->space:  0x%08x\n",
-			   rq->ring->space);
+			   ring->space);
 		drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
-			   rq->timeline->hwsp_offset);
+			   ring->timeline->hwsp_offset);
 
-		print_request_ring(m, rq);
+		print_request_ring(m, rq, ring);
 	}
-
-	rcu_read_unlock();
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm);
 	if (wakeref) {
@@ -1689,7 +1692,6 @@ struct i915_request *
 intel_engine_find_active_request(struct intel_engine_cs *engine)
 {
 	struct i915_request *request, *active = NULL;
-	unsigned long flags;
 
 	/*
 	 * We are called by the error capture, reset and to dump engine
@@ -1702,7 +1704,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
 	 * At all other times, we must assume the GPU is still running, but
 	 * we only care about the snapshot of this moment.
 	 */
-	spin_lock_irqsave(&engine->active.lock, flags);
+	lockdep_assert_held(&engine->active.lock);
 	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (i915_request_completed(request))
 			continue;
@@ -1717,7 +1719,6 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
 		active = request;
 		break;
 	}
-	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	return active;
 }
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5489cd879315..f297a43df1e9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1411,6 +1411,7 @@ static void gem_record_rings(struct i915_gpu_state *error)
 		struct intel_engine_cs *engine = i915->engine[i];
 		struct drm_i915_error_engine *ee = &error->engine[i];
 		struct i915_request *request;
+		unsigned long flags;
 
 		ee->engine_id = -1;
 
@@ -1422,10 +1423,11 @@ static void gem_record_rings(struct i915_gpu_state *error)
 		error_record_engine_registers(error, engine, ee);
 		error_record_engine_execlists(engine, ee);
 
+		spin_lock_irqsave(&engine->active.lock, flags);
 		request = intel_engine_find_active_request(engine);
 		if (request) {
 			struct i915_gem_context *ctx = request->gem_context;
-			struct intel_ring *ring;
+			struct intel_ring *ring = request->ring;
 
 			ee->vm = ctx->vm ?: &engine->gt->ggtt->vm;
 
@@ -1455,7 +1457,6 @@ static void gem_record_rings(struct i915_gpu_state *error)
 			ee->rq_post = request->postfix;
 			ee->rq_tail = request->tail;
 
-			ring = request->ring;
 			ee->cpu_ring_head = ring->head;
 			ee->cpu_ring_tail = ring->tail;
 			ee->ringbuffer =
@@ -1463,6 +1464,7 @@ static void gem_record_rings(struct i915_gpu_state *error)
 
 			engine_record_requests(engine, request, ee);
 		}
+		spin_unlock_irqrestore(&engine->active.lock, flags);
 
 		ee->hws_page =
 			i915_error_object_create(i915,
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux