[PATCH] [v4] drm/i915: Capture current context on error

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On error, this represents the state of the currently running context at
the time it was loaded.

Unfortunately, since we're hung and can't switch out the context this
may not tell us too much about the most current state of the context,
but does give clues about what has happened since loading.

Thanks to recent doc updates, we have a little more confidence regarding
what is actually in this memory, and perhaps it will help us gain more
insight into certain bugs. AFAICT, the most interesting info is in the
first page. To save space, we only capture the first page. In the
future, we might want to dump more.

Sample of the relevant part of error state:
render ring --- HW Context = 0x01b20000
[0000] 00000000 1100105f 00002028 ffff0880
[0010] 0000209c feff4040 000020c0 efdf0080
[0020] 00002178 00000001 0000217c 00145855
[0030] 00002310 00000000 00002314 00000000

v2: Move error collection to the ring error code
Change format of dump to not confuse intel_error_decode (Chris)
Put the context error object with the others (Chris)
Don't search bound_list instead of active_list (chris)

v3: extract and flatten context recording (daniel)
checkpatch related fixes for the copypasta in debugfs

v4: bug in v3 (Daniel)
-       if ((ring->id == RCS) && error->ccid)
+       if ((ring->id != RCS) || !error->ccid)

References: https://bugs.freedesktop.org/show_bug.cgi?id=55845
Reviewed-by (v2): Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 17 +++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h     |  2 +-
 drivers/gpu/drm/i915/i915_irq.c     | 23 +++++++++++++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 7c65ab8..c92ae7f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -772,6 +772,23 @@ static int i915_error_state(struct seq_file *m, void *unused)
 				}
 			}
 		}
+
+		obj = error->ring[i].ctx;
+		if (obj) {
+			seq_printf(m, "%s --- HW Context = 0x%08x\n",
+				   dev_priv->ring[i].name,
+				   obj->gtt_offset);
+			offset = 0;
+			for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
+				seq_printf(m, "[%04x] %08x %08x %08x %08x\n",
+					   offset,
+					   obj->pages[0][elt],
+					   obj->pages[0][elt+1],
+					   obj->pages[0][elt+2],
+					   obj->pages[0][elt+3]);
+					offset += 16;
+			}
+		}
 	}
 
 	if (error->overlay)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e95337c..bedabcd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -243,7 +243,7 @@ struct drm_i915_error_state {
 			int page_count;
 			u32 gtt_offset;
 			u32 *pages[0];
-		} *ringbuffer, *batchbuffer;
+		} *ringbuffer, *batchbuffer, *ctx;
 		struct drm_i915_error_request {
 			long jiffies;
 			u32 seqno;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6a328b8..dd95e82 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1238,6 +1238,26 @@ static void i915_record_ring_state(struct drm_device *dev,
 	error->cpu_ring_tail[ring->id] = ring->tail;
 }
 
+
+static void i915_gem_record_active_context(struct intel_ring_buffer *ring,
+					   struct drm_i915_error_state *error,
+					   struct drm_i915_error_ring *ering)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct drm_i915_gem_object *obj;
+
+	/* Currently render ring is the only HW context user */
+	if ((ring->id != RCS) || !error->ccid)
+		return;
+
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
+		if ((error->ccid & PAGE_MASK) == obj->gtt_offset) {
+			ering->ctx = i915_error_object_create_sized(dev_priv,
+								    obj, 1);
+		}
+	}
+}
+
 static void i915_gem_record_rings(struct drm_device *dev,
 				  struct drm_i915_error_state *error)
 {
@@ -1255,6 +1275,9 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		error->ring[i].ringbuffer =
 			i915_error_object_create(dev_priv, ring->obj);
 
+
+		i915_gem_record_active_context(ring, error, &error->ring[i]);
+
 		count = 0;
 		list_for_each_entry(request, &ring->request_list, list)
 			count++;
-- 
1.8.1.5



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux