[CI] drm/i915: Reduce i915_request.lock contention for i915_request_wait

Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> · Thu, 16 Jul 2020 11:07:54 +0100

Currently, we use i915_request_completed() directly in
i915_request_wait() and follow up with a manual invocation of
dma_fence_signal(). This appears to cause a large number of contentions
on i915_request.lock as when the process is woken up after the fence is
signaled by an interrupt, we will then try and call dma_fence_signal()
ourselves while the signaler is still holding the lock.
dma_fence_is_signaled() has the benefit of checking the
DMA_FENCE_FLAG_SIGNALED_BIT prior to calling dma_fence_signal() and so
avoids most of that contention.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Matthew Auld <matthew.auld@xxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_request.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 0b2fe55e6194..49f181ab4982 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1640,7 +1640,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
 	return this_cpu != cpu;
 }
 
-static bool __i915_spin_request(const struct i915_request * const rq, int state)
+static bool __i915_spin_request(struct i915_request * const rq, int state)
 {
 	unsigned long timeout_ns;
 	unsigned int cpu;
@@ -1673,7 +1673,7 @@ static bool __i915_spin_request(const struct i915_request * const rq, int state)
 	timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
 	timeout_ns += local_clock_ns(&cpu);
 	do {
-		if (i915_request_completed(rq))
+		if (dma_fence_is_signaled(&rq->fence))
 			return true;
 
 		if (signal_pending_state(state, current))
@@ -1697,7 +1697,7 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
 	struct request_wait *wait = container_of(cb, typeof(*wait), cb);
 
-	wake_up_process(wait->tsk);
+	wake_up_process(fetch_and_zero(&wait->tsk));
 }
 
 /**
@@ -1766,10 +1766,8 @@ long i915_request_wait(struct i915_request *rq,
 	 * duration, which we currently lack.
 	 */
 	if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
-	    __i915_spin_request(rq, state)) {
-		dma_fence_signal(&rq->fence);
+	    __i915_spin_request(rq, state))
 		goto out;
-	}
 
 	/*
 	 * This client is about to stall waiting for the GPU. In many cases
@@ -1796,10 +1794,8 @@ long i915_request_wait(struct i915_request *rq,
 	for (;;) {
 		set_current_state(state);
 
-		if (i915_request_completed(rq)) {
-			dma_fence_signal(&rq->fence);
+		if (dma_fence_is_signaled(&rq->fence))
 			break;
-		}
 
 		intel_engine_flush_submission(rq->engine);
 
@@ -1817,7 +1813,9 @@ long i915_request_wait(struct i915_request *rq,
 	}
 	__set_current_state(TASK_RUNNING);
 
-	dma_fence_remove_callback(&rq->fence, &wait.cb);
+	if (READ_ONCE(wait.tsk))
+		dma_fence_remove_callback(&rq->fence, &wait.cb);
+	GEM_BUG_ON(!list_empty(&wait.cb.node));
 
 out:
 	mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx