Now that we have eliminated the CPU-side irq_seqno_barrier by moving the delays on the GPU before emitting the MI_USER_INTERRUPT, we can remove the engine->irq_seqno_barrier infrastructure. Though intentionally slowing down the GPU is nasty, so is the code we can now remove! Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 84 ------------------------ drivers/gpu/drm/i915/i915_gem.c | 7 -- drivers/gpu/drm/i915/i915_irq.c | 7 -- drivers/gpu/drm/i915/i915_request.c | 8 +-- drivers/gpu/drm/i915/intel_breadcrumbs.c | 26 -------- drivers/gpu/drm/i915/intel_engine_cs.c | 6 -- drivers/gpu/drm/i915/intel_hangcheck.c | 10 --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 -- drivers/gpu/drm/i915/intel_ringbuffer.h | 10 --- 9 files changed, 1 insertion(+), 161 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7da653ece4dd..2ce731d987f1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3584,90 +3584,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } } -static inline bool -__i915_request_irq_complete(const struct i915_request *rq) -{ - struct intel_engine_cs *engine = rq->engine; - u32 seqno; - - /* Note that the engine may have wrapped around the seqno, and - * so our request->global_seqno will be ahead of the hardware, - * even though it completed the request before wrapping. We catch - * this by kicking all the waiters before resetting the seqno - * in hardware, and also signal the fence. - */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) - return true; - - /* The request was dequeued before we were awoken. We check after - * inspecting the hw to confirm that this was the same request - * that generated the HWS update. The memory barriers within - * the request execution are sufficient to ensure that a check - * after reading the value from hw matches this request. - */ - seqno = i915_request_global_seqno(rq); - if (!seqno) - return false; - - /* Before we do the heavier coherent read of the seqno, - * check the value (hopefully) in the CPU cacheline. - */ - if (__i915_request_completed(rq, seqno)) - return true; - - /* Ensure our read of the seqno is coherent so that we - * do not "miss an interrupt" (i.e. if this is the last - * request and the seqno write from the GPU is not visible - * by the time the interrupt fires, we will see that the - * request is incomplete and go back to sleep awaiting - * another interrupt that will never come.) - * - * Strictly, we only need to do this once after an interrupt, - * but it is easier and safer to do it every time the waiter - * is woken. - */ - if (engine->irq_seqno_barrier && - test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - /* The ordering of irq_posted versus applying the barrier - * is crucial. The clearing of the current irq_posted must - * be visible before we perform the barrier operation, - * such that if a subsequent interrupt arrives, irq_posted - * is reasserted and our task rewoken (which causes us to - * do another __i915_request_irq_complete() immediately - * and reapply the barrier). Conversely, if the clear - * occurs after the barrier, then an interrupt that arrived - * whilst we waited on the barrier would not trigger a - * barrier on the next pass, and the read may not see the - * seqno update. - */ - engine->irq_seqno_barrier(engine); - - /* If we consume the irq, but we are no longer the bottom-half, - * the real bottom-half may not have serialised their own - * seqno check with the irq-barrier (i.e. may have inspected - * the seqno before we believe it coherent since they see - * irq_posted == false but we are still running). - */ - spin_lock_irq(&b->irq_lock); - if (b->irq_wait && b->irq_wait->tsk != current) - /* Note that if the bottom-half is changed as we - * are sending the wake-up, the new bottom-half will - * be woken by whomever made the change. We only have - * to worry about when we steal the irq-posted for - * ourself. - */ - wake_up_process(b->irq_wait->tsk); - spin_unlock_irq(&b->irq_lock); - - if (__i915_request_completed(rq, seqno)) - return true; - } - - return false; -} - void i915_memcpy_init_early(struct drm_i915_private *dev_priv); bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f4254e012620..7ea87c7a0e8c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3230,13 +3230,6 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine, struct i915_request *request, bool stalled) { - /* - * Make sure this write is visible before we re-enable the interrupt - * handlers on another CPU, as tasklet_enable() resolves to just - * a compiler barrier which is insufficient for our purpose here. - */ - smp_store_mb(engine->irq_posted, 0); - if (request) request = i915_gem_reset_request(engine, request, stalled); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0c7fc9890891..fbb094ecf6c9 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1189,13 +1189,6 @@ static void notify_ring(struct intel_engine_cs *engine) rq = i915_request_get(waiter); tsk = wait->tsk; - } else { - if (engine->irq_seqno_barrier && - i915_seqno_passed(seqno, wait->seqno - 1)) { - set_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted); - tsk = wait->tsk; - } } engine->breadcrumbs.irq_count++; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 8f8e844f2e85..432deadea584 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1178,13 +1178,7 @@ long i915_request_wait(struct i915_request *rq, set_current_state(state); wakeup: - /* - * Carefully check if the request is complete, giving time - * for the seqno to be visible following the interrupt. - * We also have to check in case we are kicked by the GPU - * reset in order to drop the struct_mutex. - */ - if (__i915_request_irq_complete(rq)) + if (i915_request_completed(rq)) break; /* diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 447c5256f63a..4ed7105d7ff5 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -166,12 +166,6 @@ static void irq_enable(struct intel_engine_cs *engine) */ GEM_BUG_ON(!intel_irqs_enabled(engine->i915)); - /* Enabling the IRQ may miss the generation of the interrupt, but - * we still need to force the barrier before reading the seqno, - * just in case. - */ - set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - /* Caller disables interrupts */ if (engine->irq_enable) { spin_lock(&engine->i915->irq_lock); @@ -683,16 +677,6 @@ static int intel_breadcrumbs_signaler(void *arg) } if (unlikely(do_schedule)) { - /* Before we sleep, check for a missed seqno */ - if (current->state & TASK_NORMAL && - !list_empty(&b->signals) && - engine->irq_seqno_barrier && - test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted)) { - engine->irq_seqno_barrier(engine); - intel_engine_wakeup(engine); - } - sleep: if (kthread_should_park()) kthread_parkme(); @@ -859,16 +843,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) else irq_disable(engine); - /* - * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the - * GPU is active and may have already executed the MI_USER_INTERRUPT - * before the CPU is ready to receive. However, the engine is currently - * idle (we haven't started it yet), there is no possibility for a - * missed interrupt as we enabled the irq and so we can clear the - * immediate wakeup (until a real interrupt arrives for the waiter). - */ - clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - spin_unlock_irqrestore(&b->irq_lock, flags); } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 1462bb49f420..189a934a63e9 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -457,7 +457,6 @@ int intel_engines_init(struct drm_i915_private *dev_priv) void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno) { intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); /* After manually advancing the seqno, fake the interrupt in case * there are any waiters for that seqno. @@ -1536,11 +1535,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_unlock(&b->rb_lock); local_irq_restore(flags); - drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n", - engine->irq_posted, - yesno(test_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted))); - drm_printf(m, "HWSP:\n"); hexdump(m, engine->status_page.page_addr, PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index c3f929f59424..51e9efec5116 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -120,16 +120,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) static void hangcheck_load_sample(struct intel_engine_cs *engine, struct intel_engine_hangcheck *hc) { - /* We don't strictly need an irq-barrier here, as we are not - * serving an interrupt request, be paranoid in case the - * barrier has side-effects (such as preventing a broken - * cacheline snoop) and so be sure that we can see the seqno - * advance. If the seqno should stick, due to a stale - * cacheline, we would erroneously declare the GPU hung. - */ - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - hc->acthd = intel_engine_get_active_head(engine); hc->seqno = intel_engine_get_seqno(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 323e1e2c1e5e..f8c35cf83fc2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -711,10 +711,6 @@ static int init_ring_common(struct intel_engine_cs *engine) static struct i915_request *reset_prepare(struct intel_engine_cs *engine) { intel_engine_stop_cs(engine); - - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - return i915_gem_find_active_request(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 99e2cb75d29a..91ef00d34e91 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -365,9 +365,6 @@ struct intel_engine_cs { struct drm_i915_gem_object *default_state; void *pinned_default_state; - unsigned long irq_posted; -#define ENGINE_IRQ_BREADCRUMB 0 - /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the @@ -501,13 +498,6 @@ struct intel_engine_cs { */ void (*cancel_requests)(struct intel_engine_cs *engine); - /* Some chipsets are not quite as coherent as advertised and need - * an expensive kick to force a true read of the up-to-date seqno. - * However, the up-to-date seqno is not always required and the last - * seen value is good enough. Note that the seqno will always be - * monotonic, even if not coherent. - */ - void (*irq_seqno_barrier)(struct intel_engine_cs *engine); void (*cleanup)(struct intel_engine_cs *engine); struct intel_engine_execlists execlists; -- 2.20.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx