Re: [PATCH 2/3] drm/i915/guc: Add work queue to trigger a GT reset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 1/20/2022 20:31, Matthew Brost wrote:
The G2H handler needs to be flushed during a GT reset but a G2H
indicating engine reset failure can trigger a GT reset. Add a worker to
trigger the GT rest when an engine reset failure is received to break
this circular dependency.

v2:
  (John Harrison)
   - Store engine reset mask
   - Fix typo in commit message
v3:
  (John Harrison)
   - Fix another typo in commit message
   - s/reset_*/reset_fail_*/

Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx>
Reviewed-by: John Harrison <John.C.Harrison@xxxxxxxxx>

---
  drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  9 +++++
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 37 +++++++++++++++++--
  2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 9d26a86fe557a..d59bbf49d1c2b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -119,6 +119,15 @@ struct intel_guc {
  		 * function as it might be in an atomic context (no sleeping)
  		 */
  		struct work_struct destroyed_worker;
+		/**
+		 * @reset_fail_worker: worker to trigger a GT reset after an
+		 * engine reset fails
+		 */
+		struct work_struct reset_fail_worker;
+		/**
+		 * @reset_fail_mask: mask of engines that failed to reset
+		 */
+		intel_engine_mask_t reset_fail_mask;
  	} submission_state;
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 3918f1be114fa..9a3f503d201aa 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1731,6 +1731,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
  }
static void destroyed_worker_func(struct work_struct *w);
+static void reset_fail_worker_func(struct work_struct *w);
/*
   * Set up the memory resources to be shared with the GuC (via the GGTT)
@@ -1761,6 +1762,8 @@ int intel_guc_submission_init(struct intel_guc *guc)
  	INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
  	INIT_WORK(&guc->submission_state.destroyed_worker,
  		  destroyed_worker_func);
+	INIT_WORK(&guc->submission_state.reset_fail_worker,
+		  reset_fail_worker_func);
guc->submission_state.guc_ids_bitmap =
  		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
@@ -4026,6 +4029,26 @@ guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
  	return gt->engine_class[engine_class][instance];
  }
+static void reset_fail_worker_func(struct work_struct *w)
+{
+	struct intel_guc *guc = container_of(w, struct intel_guc,
+					     submission_state.reset_fail_worker);
+	struct intel_gt *gt = guc_to_gt(guc);
+	intel_engine_mask_t reset_fail_mask;
+	unsigned long flags;
+
+	spin_lock_irqsave(&guc->submission_state.lock, flags);
+	reset_fail_mask = guc->submission_state.reset_fail_mask;
+	guc->submission_state.reset_fail_mask = 0;
+	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+	if (likely(reset_fail_mask))
+		intel_gt_handle_error(gt, reset_fail_mask,
+				      I915_ERROR_CAPTURE,
+				      "GuC failed to reset engine mask=0x%x\n",
+				      reset_fail_mask);
+}
+
  int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
  					 const u32 *msg, u32 len)
  {
@@ -4033,6 +4056,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
  	struct intel_gt *gt = guc_to_gt(guc);
  	u8 guc_class, instance;
  	u32 reason;
+	unsigned long flags;
if (unlikely(len != 3)) {
  		drm_err(&gt->i915->drm, "Invalid length %u", len);
@@ -4057,10 +4081,15 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
  	drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
  		guc_class, instance, engine->name, reason);
- intel_gt_handle_error(gt, engine->mask,
-			      I915_ERROR_CAPTURE,
-			      "GuC failed to reset %s (reason=0x%08x)\n",
-			      engine->name, reason);
+	spin_lock_irqsave(&guc->submission_state.lock, flags);
+	guc->submission_state.reset_fail_mask |= engine->mask;
+	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+	/*
+	 * A GT reset flushes this worker queue (G2H handler) so we must use
+	 * another worker to trigger a GT reset.
+	 */
+	queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
return 0;
  }




[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux