Re: [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 8/14/2020 8:57 AM, Chris Wilson wrote:
Since we expect to inline the csb_parse() routines, the w/a for the
stale CSB data on Tigerlake will be pulled into process_csb(), and so we
might as well simply reuse the logic for all, and so will hopefully
avoid any strange behaviour on Icelake that was not covered by our
previous w/a.

References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
Cc: Bruce Chang <yu.bruce.chang@xxxxxxxxx>
---
  drivers/gpu/drm/i915/gt/intel_lrc.c | 70 +++++++++++++++++------------
  1 file changed, 42 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3b8161c6b601..c176a029f27b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2496,25 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
   *     bits 47-57: sw context id of the lrc the GT switched away from
   *     bits 58-63: sw counter of the lrc the GT switched away from
   */
-static inline bool gen12_csb_parse(const u64 *csb)
+static inline bool gen12_csb_parse(const u64 csb)
  {
-	bool ctx_away_valid;
-	bool new_queue;
-	u64 entry;
-
-	/* XXX HSD */
-	entry = READ_ONCE(*csb);
-	if (unlikely(entry == -1)) {
-		preempt_disable();
-		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
-			GEM_WARN_ON("50us CSB timeout");
-		preempt_enable();
-	}
-	WRITE_ONCE(*(u64 *)csb, -1);
-
-	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
-	new_queue =
-		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
+	bool new_queue =
+		lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
/*
  	 * The context switch detail is not guaranteed to be 5 when a preemption
@@ -2524,7 +2510,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
  	 * would require some extra handling, but we don't support that.
  	 */
  	if (!ctx_away_valid || new_queue) {
-		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
+		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
  		return true;
  	}
@@ -2533,19 +2519,47 @@ static inline bool gen12_csb_parse(const u64 *csb)
  	 * context switch on an unsuccessful wait instruction since we always
  	 * use polling mode.
  	 */
-	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
+	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
  	return false;
  }
-static inline bool gen8_csb_parse(const u64 *csb)
+static inline bool gen8_csb_parse(const u64 csb)
+{
+	return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+}
+
+static inline u64 csb_read(u64 * const csb)
  {
-	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+	u64 entry = READ_ONCE(*csb);
+
+	/*
+	 * Unfortunately, the GPU does not always serialise its write
+	 * of the CSB entries before its write of the CSB pointer, at least
+	 * from the perspective of the CPU, using what is known as a Global
+	 * Observation Point. We may read a new CSB tail pointer, but then
+	 * read the stale CSB entries, causing us to misinterpret the
+	 * context-switch events, and eventually declare the GPU hung.
+	 *
+	 * icl:HSDES#:1806554093
+	 * tgl:XXX?
FYI: A HSD was also filed recently: HSD# 22011248461
+	 */
+	if (unlikely(entry == -1)) {
+		preempt_disable();
+		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+			GEM_WARN_ON("50us CSB timeout");
+		preempt_enable();
+	}
+
+	/* Consume this entry so that we can spot its future reuse. */
+	WRITE_ONCE(*csb, -1);
+
+	return entry;
  }
static void process_csb(struct intel_engine_cs *engine)
  {
  	struct intel_engine_execlists * const execlists = &engine->execlists;
-	const u64 * const buf = execlists->csb_status;
+	u64 * const buf = execlists->csb_status;
  	const u8 num_entries = execlists->csb_size;
  	u8 head, tail;
@@ -2603,6 +2617,7 @@ static void process_csb(struct intel_engine_cs *engine)
  	rmb();
  	do {
  		bool promote;
+		u64 csb;
if (++head == num_entries)
  			head = 0;
@@ -2625,15 +2640,14 @@ static void process_csb(struct intel_engine_cs *engine)
  		 * status notifier.
  		 */
+ csb = csb_read(buf + head);
  		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
-			     head,
-			     upper_32_bits(buf[head]),
-			     lower_32_bits(buf[head]));
+			     head, upper_32_bits(csb), lower_32_bits(csb));
Nice change! The original trace will actually read the CSB entry. So when the trace was enabled, our issue will go away since one extra read will fix our issue.
if (INTEL_GEN(engine->i915) >= 12)
-			promote = gen12_csb_parse(buf + head);
+			promote = gen12_csb_parse(csb);
  		else
-			promote = gen8_csb_parse(buf + head);
+			promote = gen8_csb_parse(csb);
  		if (promote) {
  			struct i915_request * const *old = execlists->active;
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx



[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux