Re: [PATCH 20/27] drm/i915/guc: Rework and simplify locking

Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> · Wed, 25 Aug 2021 09:52:06 -0700

On 8/18/2021 11:16 PM, Matthew Brost wrote:
Rework and simplify the locking with GuC subission. Drop
sched_state_no_lock and move all fields under the guc_state.sched_state
and protect all these fields with guc_state.lock . This requires
changing the locking hierarchy from guc_state.lock -> sched_engine.lock
to sched_engine.lock -> guc_state.lock.

Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx>
---
  drivers/gpu/drm/i915/gt/intel_context_types.h |   5 +-
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 186 ++++++++----------
  drivers/gpu/drm/i915/i915_trace.h             |   6 +-
  3 files changed, 89 insertions(+), 108 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index c06171ee8792..d5d643b04d54 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -161,7 +161,7 @@ struct intel_context {
  		 * sched_state: scheduling state of this context using GuC
  		 * submission
  		 */
-		u16 sched_state;
+		u32 sched_state;
  		/*
  		 * fences: maintains of list of requests that have a submit
  		 * fence related to GuC submission
@@ -178,9 +178,6 @@ struct intel_context {
  		struct list_head requests;
  	} guc_active;
  
-	/* GuC scheduling state flags that do not require a lock. */
-	atomic_t guc_sched_state_no_lock;
-
  	/* GuC LRC descriptor ID */
  	u16 guc_id;
  
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 053f4485d6e9..509b298e7cf3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -72,86 +72,23 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
  
  #define GUC_REQUEST_SIZE 64 /* bytes */
  
-/*
- * Below is a set of functions which control the GuC scheduling state which do
- * not require a lock as all state transitions are mutually exclusive. i.e. It
- * is not possible for the context pinning code and submission, for the same
- * context, to be executing simultaneously. We still need an atomic as it is
- * possible for some of the bits to changing at the same time though.
- */
-#define SCHED_STATE_NO_LOCK_ENABLED			BIT(0)
-#define SCHED_STATE_NO_LOCK_PENDING_ENABLE		BIT(1)
-#define SCHED_STATE_NO_LOCK_REGISTERED			BIT(2)
-static inline bool context_enabled(struct intel_context *ce)
-{
-	return (atomic_read(&ce->guc_sched_state_no_lock) &
-		SCHED_STATE_NO_LOCK_ENABLED);
-}
-
-static inline void set_context_enabled(struct intel_context *ce)
-{
-	atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock);
-}
-
-static inline void clr_context_enabled(struct intel_context *ce)
-{
-	atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
-		   &ce->guc_sched_state_no_lock);
-}
-
-static inline bool context_pending_enable(struct intel_context *ce)
-{
-	return (atomic_read(&ce->guc_sched_state_no_lock) &
-		SCHED_STATE_NO_LOCK_PENDING_ENABLE);
-}
-
-static inline void set_context_pending_enable(struct intel_context *ce)
-{
-	atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE,
-		  &ce->guc_sched_state_no_lock);
-}
-
-static inline void clr_context_pending_enable(struct intel_context *ce)
-{
-	atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE,
-		   &ce->guc_sched_state_no_lock);
-}
-
-static inline bool context_registered(struct intel_context *ce)
-{
-	return (atomic_read(&ce->guc_sched_state_no_lock) &
-		SCHED_STATE_NO_LOCK_REGISTERED);
-}
-
-static inline void set_context_registered(struct intel_context *ce)
-{
-	atomic_or(SCHED_STATE_NO_LOCK_REGISTERED,
-		  &ce->guc_sched_state_no_lock);
-}
-
-static inline void clr_context_registered(struct intel_context *ce)
-{
-	atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED,
-		   &ce->guc_sched_state_no_lock);
-}
-
  /*
   * Below is a set of functions which control the GuC scheduling state which
- * require a lock, aside from the special case where the functions are called
- * from guc_lrc_desc_pin(). In that case it isn't possible for any other code
- * path to be executing on the context.
+ * require a lock.
   */
  #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER	BIT(0)
  #define SCHED_STATE_DESTROYED				BIT(1)
  #define SCHED_STATE_PENDING_DISABLE			BIT(2)
  #define SCHED_STATE_BANNED				BIT(3)
-#define SCHED_STATE_BLOCKED_SHIFT			4
+#define SCHED_STATE_ENABLED				BIT(4)
+#define SCHED_STATE_PENDING_ENABLE			BIT(5)
+#define SCHED_STATE_REGISTERED				BIT(6)
+#define SCHED_STATE_BLOCKED_SHIFT			7
  #define SCHED_STATE_BLOCKED		BIT(SCHED_STATE_BLOCKED_SHIFT)
  #define SCHED_STATE_BLOCKED_MASK	(0xfff << SCHED_STATE_BLOCKED_SHIFT)
  static inline void init_sched_state(struct intel_context *ce)
  {
  	lockdep_assert_held(&ce->guc_state.lock);
-	atomic_set(&ce->guc_sched_state_no_lock, 0);
  	ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
  }
  
@@ -162,9 +99,8 @@ static bool sched_state_is_init(struct intel_context *ce)
  	 * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after
  	 * suspend.
  	 */
-	return !(atomic_read(&ce->guc_sched_state_no_lock) &
-		 ~SCHED_STATE_NO_LOCK_REGISTERED) &&
-		!(ce->guc_state.sched_state &= ~SCHED_STATE_BLOCKED_MASK);
+	return !(ce->guc_state.sched_state &=
+		 ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
  }
  
  static inline bool
@@ -237,6 +173,57 @@ static inline void clr_context_banned(struct intel_context *ce)
  	ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
  }
  
+static inline bool context_enabled(struct intel_context *ce)
+{
+	return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
+}
+
+static inline void set_context_enabled(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->guc_state.lock);
+	ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
+}
+
+static inline void clr_context_enabled(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->guc_state.lock);
+	ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
+}
+
+static inline bool context_pending_enable(struct intel_context *ce)
+{
+	return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
+}
+
+static inline void set_context_pending_enable(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->guc_state.lock);
+	ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
+}
+
+static inline void clr_context_pending_enable(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->guc_state.lock);
+	ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
+}
+
+static inline bool context_registered(struct intel_context *ce)
+{
+	return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
+}
+
+static inline void set_context_registered(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->guc_state.lock);
+	ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
+}
+
+static inline void clr_context_registered(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->guc_state.lock);
+	ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
+}
+
  static inline u32 context_blocked(struct intel_context *ce)
  {
  	return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
@@ -245,7 +232,6 @@ static inline u32 context_blocked(struct intel_context *ce)
  
  static inline void incr_context_blocked(struct intel_context *ce)
  {
-	lockdep_assert_held(&ce->engine->sched_engine->lock);
  	lockdep_assert_held(&ce->guc_state.lock);
  
  	ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
@@ -255,7 +241,6 @@ static inline void incr_context_blocked(struct intel_context *ce)
  
  static inline void decr_context_blocked(struct intel_context *ce)
  {
-	lockdep_assert_held(&ce->engine->sched_engine->lock);
  	lockdep_assert_held(&ce->guc_state.lock);
  
  	GEM_BUG_ON(!context_blocked(ce));	/* Underflow check */
@@ -450,6 +435,8 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
  	u32 g2h_len_dw = 0;
  	bool enabled;
  
+	lockdep_assert_held(&rq->engine->sched_engine->lock);
+
  	/*
  	 * Corner case where requests were sitting in the priority list or a
  	 * request resubmitted after the context was banned.
@@ -457,7 +444,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
  	if (unlikely(intel_context_is_banned(ce))) {
  		i915_request_put(i915_request_mark_eio(rq));
  		intel_engine_signal_breadcrumbs(ce->engine);
-		goto out;
+		return 0;
  	}
  
  	GEM_BUG_ON(!atomic_read(&ce->guc_id_ref));
@@ -470,9 +457,11 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
  	if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) {
  		err = guc_lrc_desc_pin(ce, false);
  		if (unlikely(err))
-			goto out;
+			return err;
  	}
  
+	spin_lock(&ce->guc_state.lock);
+
  	/*
  	 * The request / context will be run on the hardware when scheduling
  	 * gets enabled in the unblock.
@@ -507,6 +496,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
  		trace_i915_request_guc_submit(rq);
  
  out:
+	spin_unlock(&ce->guc_state.lock);
  	return err;
  }
  
@@ -727,8 +717,6 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
  	spin_lock_irq(&guc_to_gt(guc)->irq_lock);
  	spin_unlock_irq(&guc_to_gt(guc)->irq_lock);
  
-	guc_flush_submissions(guc);
-
  	flush_work(&guc->ct.requests.worker);
  
  	scrub_guc_desc_for_outstanding_g2h(guc);
@@ -1133,7 +1121,11 @@ static int steal_guc_id(struct intel_guc *guc)
  
  		list_del_init(&ce->guc_id_link);
  		guc_id = ce->guc_id;
+
+		spin_lock(&ce->guc_state.lock);
  		clr_context_registered(ce);
+		spin_unlock(&ce->guc_state.lock);
+
  		set_context_guc_id_invalid(ce);
  		return guc_id;
  	} else {
@@ -1169,6 +1161,8 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
  try_again:
  	spin_lock_irqsave(&guc->contexts_lock, flags);
  
+	might_lock(&ce->guc_state.lock);
+
  	if (context_guc_id_invalid(ce)) {
  		ret = assign_guc_id(guc, &ce->guc_id);
  		if (ret)
@@ -1248,8 +1242,13 @@ static int register_context(struct intel_context *ce, bool loop)
  	trace_intel_context_register(ce);
  
  	ret = __guc_action_register_context(guc, ce->guc_id, offset, loop);
-	if (likely(!ret))
+	if (likely(!ret)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&ce->guc_state.lock, flags);
  		set_context_registered(ce);
+		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+	}
  
  	return ret;
  }
@@ -1525,7 +1524,6 @@ static u16 prep_context_pending_disable(struct intel_context *ce)
  static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
  {
  	struct intel_guc *guc = ce_to_guc(ce);
-	struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
  	unsigned long flags;
  	struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
  	intel_wakeref_t wakeref;
@@ -1534,13 +1532,7 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
  
  	spin_lock_irqsave(&ce->guc_state.lock, flags);
  
-	/*
-	 * Sync with submission path, increment before below changes to context
-	 * state.
-	 */
-	spin_lock(&sched_engine->lock);
  	incr_context_blocked(ce);
-	spin_unlock(&sched_engine->lock);
  
  	enabled = context_enabled(ce);
  	if (unlikely(!enabled || submission_disabled(guc))) {
@@ -1569,7 +1561,6 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
  static void guc_context_unblock(struct intel_context *ce)
  {
  	struct intel_guc *guc = ce_to_guc(ce);
-	struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
  	unsigned long flags;
  	struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
  	intel_wakeref_t wakeref;
@@ -1594,13 +1585,7 @@ static void guc_context_unblock(struct intel_context *ce)
  		intel_context_get(ce);
  	}
  
-	/*
-	 * Sync with submission path, decrement after above changes to context
-	 * state.
-	 */
-	spin_lock(&sched_engine->lock);
  	decr_context_blocked(ce);
-	spin_unlock(&sched_engine->lock);
  
  	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
  
@@ -1710,7 +1695,9 @@ static void guc_context_sched_disable(struct intel_context *ce)
  
  	if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
  	    !lrc_desc_registered(guc, ce->guc_id)) {
+		spin_lock_irqsave(&ce->guc_state.lock, flags);

We do take this lock a few lines below this. Would it be worth just 
moving that up and do everything under the lock?

Anyway, all calls to the updated set/clr functions have been updated to 
be correctly locked, so:

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx>

Daniele

  		clr_context_enabled(ce);
+		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
  		goto unpin;
  	}
  
@@ -1760,7 +1747,6 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
  	GEM_BUG_ON(ce != __get_context(guc, ce->guc_id));
  	GEM_BUG_ON(context_enabled(ce));
  
-	clr_context_registered(ce);
  	deregister_context(ce, ce->guc_id, true);
  }
  
@@ -1833,8 +1819,10 @@ static void guc_context_destroy(struct kref *kref)
  	/* Seal race with Reset */
  	spin_lock_irqsave(&ce->guc_state.lock, flags);
  	disabled = submission_disabled(guc);
-	if (likely(!disabled))
+	if (likely(!disabled)) {
  		set_context_destroyed(ce);
+		clr_context_registered(ce);
+	}
  	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
  	if (unlikely(disabled)) {
  		release_guc_id(guc, ce);
@@ -2697,8 +2685,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
  		     (!context_pending_enable(ce) &&
  		     !context_pending_disable(ce)))) {
  		drm_err(&guc_to_gt(guc)->i915->drm,
-			"Bad context sched_state 0x%x, 0x%x, desc_idx %u",
-			atomic_read(&ce->guc_sched_state_no_lock),
+			"Bad context sched_state 0x%x, desc_idx %u",
  			ce->guc_state.sched_state, desc_idx);
  		return -EPROTO;
  	}
@@ -2713,7 +2700,9 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
  		}
  #endif
  
+		spin_lock_irqsave(&ce->guc_state.lock, flags);
  		clr_context_pending_enable(ce);
+		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
  	} else if (context_pending_disable(ce)) {
  		bool banned;
  
@@ -2987,9 +2976,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
  			   atomic_read(&ce->pin_count));
  		drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
  			   atomic_read(&ce->guc_id_ref));
-		drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n",
-			   ce->guc_state.sched_state,
-			   atomic_read(&ce->guc_sched_state_no_lock));
+		drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
+			   ce->guc_state.sched_state);
  
  		guc_log_context_priority(p, ce);
  	}
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 806ad688274b..0a77eb2944b5 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -903,7 +903,6 @@ DECLARE_EVENT_CLASS(intel_context,
  			     __field(u32, guc_id)
  			     __field(int, pin_count)
  			     __field(u32, sched_state)
-			     __field(u32, guc_sched_state_no_lock)
  			     __field(u8, guc_prio)
  			     ),
  
@@ -911,15 +910,12 @@ DECLARE_EVENT_CLASS(intel_context,
  			   __entry->guc_id = ce->guc_id;
  			   __entry->pin_count = atomic_read(&ce->pin_count);
  			   __entry->sched_state = ce->guc_state.sched_state;
-			   __entry->guc_sched_state_no_lock =
-			   atomic_read(&ce->guc_sched_state_no_lock);
  			   __entry->guc_prio = ce->guc_prio;
  			   ),
  
-		    TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x, guc_prio=%u",
+		    TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x, guc_prio=%u",
  			      __entry->guc_id, __entry->pin_count,
  			      __entry->sched_state,
-			      __entry->guc_sched_state_no_lock,
  			      __entry->guc_prio)
  );