Re: [Intel-gfx] [RFC PATCH 55/97] drm/i915/guc: Update intel_gt_wait_for_idle to work with GuC

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 25/05/2021 18:07, Matthew Brost wrote:
On Tue, May 25, 2021 at 11:06:00AM +0100, Tvrtko Ursulin wrote:

On 06/05/2021 20:14, Matthew Brost wrote:
When running the GuC the GPU can't be considered idle if the GuC still
has contexts pinned. As such, a call has been added in
intel_gt_wait_for_idle to idle the UC and in turn the GuC by waiting for
the number of unpinned contexts to go to zero.

Cc: John Harrison <john.c.harrison@xxxxxxxxx>
Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx>
---
   drivers/gpu/drm/i915/gem/i915_gem_mman.c      |  3 +-
   drivers/gpu/drm/i915/gt/intel_gt.c            | 18 ++++
   drivers/gpu/drm/i915/gt/intel_gt.h            |  2 +
   drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 22 ++---
   drivers/gpu/drm/i915/gt/intel_gt_requests.h   |  7 +-
   drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  4 +
   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c     |  1 +
   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h     |  4 +
   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 ++++++++++++++++++-
   drivers/gpu/drm/i915/gt/uc/intel_uc.h         |  5 +
   drivers/gpu/drm/i915/i915_debugfs.c           |  1 +
   drivers/gpu/drm/i915/i915_gem_evict.c         |  1 +
   .../gpu/drm/i915/selftests/igt_live_test.c    |  2 +-
   .../gpu/drm/i915/selftests/mock_gem_device.c  |  3 +-
   14 files changed, 137 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 8598a1c78a4c..2f5295c9408d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -634,7 +634,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
   		goto insert;
   	/* Attempt to reap some mmap space from dead objects */
-	err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT);
+	err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT,
+					       NULL);
   	if (err)
   		goto err;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8d77dcbad059..1742a8561f69 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -574,6 +574,24 @@ static void __intel_gt_disable(struct intel_gt *gt)
   	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
   }
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
+{
+	long rtimeout;
+
+	/* If the device is asleep, we have no requests outstanding */
+	if (!intel_gt_pm_is_awake(gt))
+		return 0;
+
+	while ((timeout = intel_gt_retire_requests_timeout(gt, timeout,
+							   &rtimeout)) > 0) {
+		cond_resched();
+		if (signal_pending(current))
+			return -EINTR;
+	}
+
+	return timeout ? timeout : intel_uc_wait_for_idle(&gt->uc, rtimeout);
+}
+
   int intel_gt_init(struct intel_gt *gt)
   {
   	int err;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 7ec395cace69..c775043334bf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt);
   void intel_gt_driver_late_release(struct intel_gt *gt);
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
+
   void intel_gt_check_and_clear_faults(struct intel_gt *gt);
   void intel_gt_clear_error_registers(struct intel_gt *gt,
   				    intel_engine_mask_t engine_mask);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 647eca9d867a..c6c702f236fa 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -13,6 +13,7 @@
   #include "intel_gt_pm.h"
   #include "intel_gt_requests.h"
   #include "intel_timeline.h"
+#include "uc/intel_uc.h"
   static bool retire_requests(struct intel_timeline *tl)
   {
@@ -130,7 +131,8 @@ void intel_engine_fini_retire(struct intel_engine_cs *engine)
   	GEM_BUG_ON(engine->retire);
   }
-long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout,
+				      long *rtimeout)

What is 'rtimeout', I know remaining, but it can be more self-descriptive to
start with.


'remaining_timeout' it is.

It feels a bit churny for what it is. How plausible would be alternatives to
either change existing timeout to in/out, or measure sleep internally in
this function, or just risk sleeping twice as long by passing the original
timeout to uc idle as well?


Originally had it just passing in the same value, got review feedback
saying I should pass in the adjusted value. Hard to make everyone happy.

Ok.

   {
   	struct intel_gt_timelines *timelines = &gt->timelines;
   	struct intel_timeline *tl, *tn;
@@ -195,22 +197,10 @@ out_active:	spin_lock(&timelines->lock);
   	if (flush_submission(gt, timeout)) /* Wait, there's more! */
   		active_count++;
-	return active_count ? timeout : 0;
-}
-
-int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
-{
-	/* If the device is asleep, we have no requests outstanding */
-	if (!intel_gt_pm_is_awake(gt))
-		return 0;
-
-	while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) {
-		cond_resched();
-		if (signal_pending(current))
-			return -EINTR;
-	}
+	if (rtimeout)
+		*rtimeout = timeout;
-	return timeout;
+	return active_count ? timeout : 0;
   }
   static void retire_work_handler(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
index fcc30a6e4fe9..4419787124e2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -10,10 +10,11 @@ struct intel_engine_cs;
   struct intel_gt;
   struct intel_timeline;
-long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout,
+				      long *rtimeout);
   static inline void intel_gt_retire_requests(struct intel_gt *gt)
   {
-	intel_gt_retire_requests_timeout(gt, 0);
+	intel_gt_retire_requests_timeout(gt, 0, NULL);
   }
   void intel_engine_init_retire(struct intel_engine_cs *engine);
@@ -21,8 +22,6 @@ void intel_engine_add_retire(struct intel_engine_cs *engine,
   			     struct intel_timeline *tl);
   void intel_engine_fini_retire(struct intel_engine_cs *engine);
-int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
-
   void intel_gt_init_requests(struct intel_gt *gt);
   void intel_gt_park_requests(struct intel_gt *gt);
   void intel_gt_unpark_requests(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 485e98f3f304..47eaa69809e8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -38,6 +38,8 @@ struct intel_guc {
   	spinlock_t irq_lock;
   	unsigned int msg_enabled_mask;
+	atomic_t outstanding_submission_g2h;
+
   	struct {
   		bool enabled;
   		void (*reset)(struct intel_guc *guc);
@@ -239,6 +241,8 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
   	spin_unlock_irq(&guc->irq_lock);
   }
+int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout);
+
   int intel_guc_reset_engine(struct intel_guc *guc,
   			   struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index f1893030ca88..cf701056fa14 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -111,6 +111,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct)
   	INIT_LIST_HEAD(&ct->requests.incoming);
   	INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func);
   	tasklet_init(&ct->receive_tasklet, ct_receive_tasklet_func, (unsigned long)ct);
+	init_waitqueue_head(&ct->wq);
   }
   static inline const char *guc_ct_buffer_type_to_str(u32 type)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index 660bf37238e2..ab1b79ab960b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -10,6 +10,7 @@
   #include <linux/spinlock.h>
   #include <linux/workqueue.h>
   #include <linux/ktime.h>
+#include <linux/wait.h>
   #include "intel_guc_fwif.h"
@@ -68,6 +69,9 @@ struct intel_guc_ct {
   	struct tasklet_struct receive_tasklet;
+	/** @wq: wait queue for g2h chanenl */
+	wait_queue_head_t wq;
+
   	struct {
   		u16 last_fence; /* last fence used to send request */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index ae0b386467e3..0ff7dd6d337d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -253,6 +253,74 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
   	xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC);
   }
+static int guc_submission_busy_loop(struct intel_guc* guc,
+				    const u32 *action,
+				    u32 len,
+				    u32 g2h_len_dw,
+				    bool loop)
+{
+	int err;
+
+	err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+
+	if (!err && g2h_len_dw)
+		atomic_inc(&guc->outstanding_submission_g2h);
+
+	return err;
+}
+
+static int guc_wait_for_pending_msg(struct intel_guc *guc,
+				    atomic_t *wait_var,
+				    bool interruptible,
+				    long timeout)
+{
+	const int state = interruptible ?
+		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+	DEFINE_WAIT(wait);
+
+	might_sleep();
+	GEM_BUG_ON(timeout < 0);
+
+	if (!atomic_read(wait_var))
+		return 0;
+
+	if (!timeout)
+		return -ETIME;
+
+	for (;;) {
+		prepare_to_wait(&guc->ct.wq, &wait, state);
+
+		if (!atomic_read(wait_var))
+			break;
+
+		if (signal_pending_state(state, current)) {
+			timeout = -ERESTARTSYS;
+			break;
+		}
+
+		if (!timeout) {
+			timeout = -ETIME;
+			break;
+		}
+
+		timeout = io_schedule_timeout(timeout);
+	}
+	finish_wait(&guc->ct.wq, &wait);
+
+	return (timeout < 0) ? timeout : 0;
+}

See if it is possible to simplify all this with wait_var_event and
wake_up_var.


Let me check on that.
+
+int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
+{
+	bool interruptible = true;
+
+	if (unlikely(timeout < 0))
+		timeout = -timeout, interruptible = false;
+
+	return guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h,
+					interruptible, timeout);
+}
+
   static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
   {
   	int err;
@@ -279,6 +347,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
   	err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
   	if (!enabled && !err) {
+		atomic_inc(&guc->outstanding_submission_g2h);
   		set_context_enabled(ce);
   	} else if (!enabled) {
   		clr_context_pending_enable(ce);
@@ -734,7 +803,7 @@ static int __guc_action_register_context(struct intel_guc *guc,
   		offset,
   	};
-	return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
+	return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
   }
   static int register_context(struct intel_context *ce)
@@ -754,7 +823,7 @@ static int __guc_action_deregister_context(struct intel_guc *guc,
   		guc_id,
   	};
-	return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action),
+	return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action),
   					G2H_LEN_DW_DEREGISTER_CONTEXT, true);
   }
@@ -871,7 +940,9 @@ static int guc_context_pin(struct intel_context *ce, void *vaddr)
   static void guc_context_unpin(struct intel_context *ce)
   {
-	unpin_guc_id(ce_to_guc(ce), ce);
+	struct intel_guc *guc = ce_to_guc(ce);
+
+	unpin_guc_id(guc, ce);
   	lrc_unpin(ce);
   }
@@ -894,7 +965,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc,
   	intel_context_get(ce);
-	intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action),
+	guc_submission_busy_loop(guc, action, ARRAY_SIZE(action),
   				 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
   }
@@ -1437,6 +1508,15 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
   	return ce;
   }
+static void decr_outstanding_submission_g2h(struct intel_guc *guc)
+{
+	if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) {
+		smp_mb();
+		if (waitqueue_active(&guc->ct.wq))
+			wake_up_all(&guc->ct.wq);

I keep pointing out this pattern is racy and at least needs comment why it
is safe.


There is a comment in wake queue code header saying why this is safe. I
don't think we need to repeat this here.

Yeah, _describing how to make it safe_, after it starts with:

 * NOTE: this function is lockless and requires care, incorrect usage _will_
 * lead to sporadic and non-obvious failure.

Then it also says:

 * Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
 * which (when the lock is uncontended) are of roughly equal cost.

I question the need to optimize this path since it means reader has to figure out if it is safe while a simple wake_up_all after atomic_dec_and_test would have done it.

Is the case of no waiters a predominant one? It at least deserves a comment explaining why the optimisation is important.

Regards,

Tvrtko


Matt

Regards,

Tvrtko

+	}
+}
+
   int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
   					  const u32 *msg,
   					  u32 len)
@@ -1472,6 +1552,8 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
   		lrc_destroy(&ce->ref);
   	}
+	decr_outstanding_submission_g2h(guc);
+
   	return 0;
   }
@@ -1520,6 +1602,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
   		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
   	}
+	decr_outstanding_submission_g2h(guc);
   	intel_context_put(ce);
   	return 0;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 9c954c589edf..c4cef885e984 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -81,6 +81,11 @@ uc_state_checkers(guc, guc_submission);
   #undef uc_state_checkers
   #undef __uc_state_checker
+static inline int intel_uc_wait_for_idle(struct intel_uc *uc, long timeout)
+{
+	return intel_guc_wait_for_idle(&uc->guc, timeout);
+}
+
   #define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \
   static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
   { \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8dd374691102..bb29838d1cd7 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -36,6 +36,7 @@
   #include "gt/intel_gt_clock_utils.h"
   #include "gt/intel_gt.h"
   #include "gt/intel_gt_pm.h"
+#include "gt/intel_gt.h"
   #include "gt/intel_gt_requests.h"
   #include "gt/intel_reset.h"
   #include "gt/intel_rc6.h"
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 4d2d59a9942b..2b73ddb11c66 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -27,6 +27,7 @@
    */
   #include "gem/i915_gem_context.h"
+#include "gt/intel_gt.h"
   #include "gt/intel_gt_requests.h"
   #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index c130010a7033..1c721542e277 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -5,7 +5,7 @@
    */
   #include "i915_drv.h"
-#include "gt/intel_gt_requests.h"
+#include "gt/intel_gt.h"
   #include "../i915_selftest.h"
   #include "igt_flush_test.h"
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index cf40004bc92a..6c06816e2b99 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -51,7 +51,8 @@ void mock_device_flush(struct drm_i915_private *i915)
   	do {
   		for_each_engine(engine, gt, id)
   			mock_engine_flush(engine);
-	} while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT));
+	} while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT,
+						  NULL));
   }
   static void mock_device_release(struct drm_device *dev)




[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux