Re: [Intel-gfx] [PATCH 13/27] drm/i915/guc: Ensure GuC schedule operations do not operate on child contexts

John Harrison <john.c.harrison@xxxxxxxxx> · Wed, 15 Sep 2021 12:24:41 -0700

On 8/20/2021 15:44, Matthew Brost wrote:
In GuC parent-child contexts the parent context controls the scheduling,
ensure only the parent does the scheduling operations.

Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx>
---
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 24 ++++++++++++++-----
  1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index dbcb9ab28a9a..00d54bb00bfb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -320,6 +320,12 @@ static void decr_context_committed_requests(struct intel_context *ce)
  	GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
  }
  
+static struct intel_context *
+request_to_scheduling_context(struct i915_request *rq)
+{
+	return intel_context_to_parent(rq->context);
+}
+
  static bool context_guc_id_invalid(struct intel_context *ce)
  {
  	return ce->guc_id.id == GUC_INVALID_LRC_ID;
@@ -1684,6 +1690,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc,
  
  	GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID);
  
+	GEM_BUG_ON(intel_context_is_child(ce));
  	trace_intel_context_sched_disable(ce);
  
  	guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
@@ -1898,6 +1905,8 @@ static void guc_context_sched_disable(struct intel_context *ce)
  	u16 guc_id;
  	bool enabled;
  
+	GEM_BUG_ON(intel_context_is_child(ce));
+
  	if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
  	    !lrc_desc_registered(guc, ce->guc_id.id)) {
  		spin_lock_irqsave(&ce->guc_state.lock, flags);
@@ -2286,6 +2295,8 @@ static void guc_signal_context_fence(struct intel_context *ce)
  {
  	unsigned long flags;
  
+	GEM_BUG_ON(intel_context_is_child(ce));
+
  	spin_lock_irqsave(&ce->guc_state.lock, flags);
  	clr_context_wait_for_deregister_to_register(ce);
  	__guc_signal_context_fence(ce);
@@ -2315,7 +2326,7 @@ static void guc_context_init(struct intel_context *ce)
  
  static int guc_request_alloc(struct i915_request *rq)
  {
-	struct intel_context *ce = rq->context;
+	struct intel_context *ce = request_to_scheduling_context(rq);
  	struct intel_guc *guc = ce_to_guc(ce);
  	unsigned long flags;
  	int ret;
@@ -2358,11 +2369,12 @@ static int guc_request_alloc(struct i915_request *rq)
  	 * exhausted and return -EAGAIN to the user indicating that they can try
  	 * again in the future.
  	 *
-	 * There is no need for a lock here as the timeline mutex ensures at
-	 * most one context can be executing this code path at once. The
-	 * guc_id_ref is incremented once for every request in flight and
-	 * decremented on each retire. When it is zero, a lock around the
-	 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
+	 * There is no need for a lock here as the timeline mutex (or
+	 * parallel_submit mutex in the case of multi-lrc) ensures at most one
+	 * context can be executing this code path at once. The guc_id_ref is
Isn't that now two? One uni-LRC holding the timeline mutex and one 
multi-LRC holding the parallel submit mutex?

John.

+	 * incremented once for every request in flight and decremented on each
+	 * retire. When it is zero, a lock around the increment (in pin_guc_id)
+	 * is needed to seal a race with unpin_guc_id.
  	 */
  	if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
  		goto out;