On 03/06/16 17:08, Chris Wilson wrote:
When waiting for an interrupt (waiting for the GPU to complete some work), we know we are the single waiter for the GPU. We also know when the GPU has nearly completed our request (or at least started processing it), so after being woken and we detect that the GPU is almost finished,
We cannot detect GPU is almost finished, just that it has started processing the waited on batch. I suggest rewording the commit msg to be accurate.
allow the bottom-half to spin for a very short while to reduce client latencies.
Hm in fact I think it should explain that it is not actually adding the busy spin for the bottom half but extending it by 2us for the first waiter case.
The impact is minimal, there was an improvement to the realtime-vs-many clients case, but exporting the function proves useful later. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 26 +++++++++++++++-------- drivers/gpu/drm/i915/i915_gem.c | 40 +++++++++++++++++++++--------------- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 5 files changed, 45 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 48683538b4e2..0c287bf0d230 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -663,7 +663,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) i915_gem_request_get_seqno(work->flip_queued_req), dev_priv->next_seqno, engine->get_seqno(engine), - i915_gem_request_completed(work->flip_queued_req, true)); + i915_gem_request_completed(work->flip_queued_req)); } else seq_printf(m, "Flip not associated with any ring\n"); seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n", diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 68b383d98457..b0460eda2113 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3219,24 +3219,27 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) return (int32_t)(seq1 - seq2) >= 0; } -static inline bool i915_gem_request_started(struct drm_i915_gem_request *req, - bool lazy_coherency) +static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) { - if (!lazy_coherency && req->engine->irq_seqno_barrier) - req->engine->irq_seqno_barrier(req->engine); return i915_seqno_passed(req->engine->get_seqno(req->engine), req->previous_seqno); } -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, - bool lazy_coherency) +static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) { - if (!lazy_coherency && req->engine->irq_seqno_barrier) - req->engine->irq_seqno_barrier(req->engine); return i915_seqno_passed(req->engine->get_seqno(req->engine), req->seqno); } +bool __i915_spin_request(const struct drm_i915_gem_request *request, + int state, unsigned long timeout_us); +static inline bool i915_spin_request(const struct drm_i915_gem_request *request, + int state, unsigned long timeout_us) +{ + return (i915_gem_request_started(request) && + __i915_spin_request(request, state, timeout_us)); +} + int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno); int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); @@ -3913,6 +3916,8 @@ static inline void i915_trace_irq_get(struct intel_engine_cs *engine, static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) { + struct intel_engine_cs *engine = req->engine; + /* Ensure our read of the seqno is coherent so that we * do not "miss an interrupt" (i.e. if this is the last * request and the seqno write from the GPU is not visible @@ -3924,7 +3929,10 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * but it is easier and safer to do it every time the waiter * is woken. */ - if (i915_gem_request_completed(req, false)) + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + + if (i915_gem_request_completed(req)) return true; /* We need to check whether any gpu reset happened in between diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d08edb3d16f1..bf5c93f2bd81 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1155,9 +1155,9 @@ static bool busywait_stop(unsigned long timeout, unsigned cpu) return this_cpu != cpu; } -static bool __i915_spin_request(struct drm_i915_gem_request *req, int state) +bool __i915_spin_request(const struct drm_i915_gem_request *req, + int state, unsigned long timeout_us) { - unsigned long timeout; unsigned cpu; /* When waiting for high frequency requests, e.g. during synchronous @@ -1170,19 +1170,15 @@ static bool __i915_spin_request(struct drm_i915_gem_request *req, int state) * takes to sleep on a request, on the order of a microsecond. */ - /* Only spin if we know the GPU is processing this request */ - if (!i915_gem_request_started(req, true)) - return false; - - timeout = local_clock_us(&cpu) + 5; + timeout_us += local_clock_us(&cpu); do { - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(req)) return true; if (signal_pending_state(state, current)) break; - if (busywait_stop(timeout, cpu)) + if (busywait_stop(timeout_us, cpu)) break; cpu_relax_lowlatency(); @@ -1224,7 +1220,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, if (list_empty(&req->list)) return 0; - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(req)) return 0; timeout_remain = MAX_SCHEDULE_TIMEOUT; @@ -1249,7 +1245,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, gen6_rps_boost(req->i915, rps, req->emitted_jiffies); /* Optimistic spin for the next ~jiffie before touching IRQs */ - if (__i915_spin_request(req, state)) + if (i915_spin_request(req, state, 5)) goto complete; intel_wait_init(&wait, req->seqno); @@ -1290,6 +1286,10 @@ wakeup: */ if (__i915_request_irq_complete(req)) break; + + /* Only spin if we know the GPU is processing this request */ + if (i915_spin_request(req, state, 2)) + break;
The behaviour described in the comment is embedded in the function called. Or change to "Only spin*s* if we know.." ?
} remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); @@ -2805,8 +2805,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; + /* We are called by the error capture and reset at a random + * point in time. In particular, note that neither is crucially + * ordered with an interrupt. After a hang, the GPU is dead and we + * assume that no more writes can happen (we waited long enough for + * all writes that were in transaction to be flushed) - adding an + * extra delay for a recent interrupt is pointless. Hence, we do + * not need an engine->irq_seqno_barrier() before the seqno reads. + */ list_for_each_entry(request, &engine->request_list, list) { - if (i915_gem_request_completed(request, false)) + if (i915_gem_request_completed(request)) continue; return request; @@ -2937,7 +2945,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) struct drm_i915_gem_request, list); - if (!i915_gem_request_completed(request, true)) + if (!i915_gem_request_completed(request)) break; i915_gem_request_retire(request); @@ -2961,7 +2969,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) } if (unlikely(engine->trace_irq_req && - i915_gem_request_completed(engine->trace_irq_req, true))) { + i915_gem_request_completed(engine->trace_irq_req))) { engine->irq_put(engine); i915_gem_request_assign(&engine->trace_irq_req, NULL); } @@ -3058,7 +3066,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) if (req == NULL) continue; - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(req)) i915_gem_object_retire__read(obj, i); } @@ -3164,7 +3172,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, if (to == from) return 0; - if (i915_gem_request_completed(from_req, true)) + if (i915_gem_request_completed(from_req)) return 0; if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2bc291ac7243..bb09ee6d1a3f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11590,7 +11590,7 @@ static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv, vblank = intel_crtc_get_vblank_counter(intel_crtc); if (work->flip_ready_vblank == 0) { if (work->flip_queued_req && - !i915_gem_request_completed(work->flip_queued_req, true)) + !i915_gem_request_completed(work->flip_queued_req)) return false; work->flip_ready_vblank = vblank; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 657a64fc2780..712bd0debb91 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7687,7 +7687,7 @@ static void __intel_rps_boost_work(struct work_struct *work) struct request_boost *boost = container_of(work, struct request_boost, work); struct drm_i915_gem_request *req = boost->req; - if (!i915_gem_request_completed(req, true)) + if (!i915_gem_request_completed(req)) gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); i915_gem_request_unreference(req); @@ -7701,7 +7701,7 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) if (req == NULL || INTEL_GEN(req->i915) < 6) return; - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(req)) return; boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
I am not such a big fan of spinning, but the code looks correct. Just please improve the commit message and that comment.
Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx