So had we just used the good old method for writing the seqno before the old fashioned MI_USER_INTERRUPT, we would have had coherent request completion - at least as far as the resolution of our testing goes, which is extremely good for this particular issue. Removing the PIPE_CONTROL notify for Ironlake removes a good chunk of special cased code and vfuncs. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_gem.c | 22 +++--- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/i915_trace.h | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 10 +++ drivers/gpu/drm/i915/intel_ringbuffer.c | 114 +++----------------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 21 +++--- 8 files changed, 44 insertions(+), 133 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4d0b5cff5291..1d385b399643 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -553,7 +553,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) seq_printf(m, "Flip queued on %s at seqno %u, next seqno %u [current breadcrumb %u], completed? %d\n", rq->engine->name, rq->seqno, rq->i915->next_seqno, - rq->engine->get_seqno(rq->engine), + intel_engine_get_seqno(rq->engine), __i915_request_complete__wa(rq)); } else seq_printf(m, "Flip not associated with any ring\n"); @@ -625,7 +625,7 @@ static void i915_engine_seqno_info(struct seq_file *m, { seq_printf(m, "Current sequence (%s): seqno=%u, tag=%u [last breadcrumb %u, last request %u], next seqno=%u, next tag=%u\n", engine->name, - engine->get_seqno(engine), + intel_engine_get_seqno(engine), engine->tag, engine->breadcrumb[engine->id], engine->last_request ? engine->last_request->seqno : 0, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 46d3aced7a50..dcb3e790cd24 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2148,20 +2148,20 @@ i915_gem_retire_requests__engine(struct intel_engine_cs *engine) if (engine->last_request == NULL) return; - if (!intel_engine_retire(engine, engine->get_seqno(engine))) + if (!intel_engine_retire(engine, intel_engine_get_seqno(engine))) return; - while (!list_empty(&engine->write_list)) { + while (!list_empty(&engine->read_list)) { struct drm_i915_gem_object *obj; - obj = list_first_entry(&engine->write_list, + obj = list_first_entry(&engine->read_list, struct drm_i915_gem_object, - last_write.engine_list); + last_read[engine->id].engine_list); - if (!obj->last_write.request->completed) + if (!obj->last_read[engine->id].request->completed) break; - i915_gem_object_retire__write(obj); + i915_gem_object_retire__read(obj, engine); } while (!list_empty(&engine->fence_list)) { @@ -2177,17 +2177,17 @@ i915_gem_retire_requests__engine(struct intel_engine_cs *engine) i915_gem_object_retire__fence(obj); } - while (!list_empty(&engine->read_list)) { + while (!list_empty(&engine->write_list)) { struct drm_i915_gem_object *obj; - obj = list_first_entry(&engine->read_list, + obj = list_first_entry(&engine->write_list, struct drm_i915_gem_object, - last_read[engine->id].engine_list); + last_write.engine_list); - if (!obj->last_read[engine->id].request->completed) + if (!obj->last_write.request->completed) break; - i915_gem_object_retire__read(obj, engine); + i915_gem_object_retire__write(obj); } } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index adb6358a8f6e..b596c9365818 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -914,7 +914,7 @@ static void i915_record_ring_state(struct drm_device *dev, ering->waiting = waitqueue_active(&engine->irq_queue); ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ering->acthd = intel_engine_get_active_head(engine); - ering->seqno = engine->get_seqno(engine); + ering->seqno = intel_engine_get_seqno(engine); ering->request = engine->last_request ? engine->last_request->seqno : 0; ering->hangcheck = engine->hangcheck.seqno; memcpy(ering->breadcrumb, engine->breadcrumb, sizeof(ering->breadcrumb)); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 71bdd9b3784f..daf1664e380d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3271,7 +3271,7 @@ static void i915_hangcheck_elapsed(unsigned long data) semaphore_clear_deadlocks(dev_priv); acthd = intel_engine_get_active_head(engine); - seqno = engine->get_seqno(engine); + seqno = intel_engine_get_seqno(engine); interrupts = atomic_read(&engine->interrupts); if (engine_idle(engine)) { diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 8bb51dcb10f3..195ce54d7694 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -474,7 +474,7 @@ TRACE_EVENT(i915_gem_ring_complete, TP_fast_assign( __entry->dev = ring->i915->dev->primary->index; __entry->ring = ring->id; - __entry->seqno = ring->get_seqno(ring); + __entry->seqno = intel_engine_get_seqno(ring); ), TP_printk("dev=%u, ring=%u, seqno=%x", diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d47af931d5ab..7959b7b1f531 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -643,6 +643,15 @@ static int execlists_resume(struct intel_engine_cs *engine) /* XXX */ I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); + I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); + + /* We need to disable the AsyncFlip performance optimisations in order + * to use MI_WAIT_FOR_EVENT within the CS. It should already be + * programmed to '1' on all products. + * + * WaDisableAsyncFlipPerfMode:bdw + */ + I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); I915_WRITE(RING_MODE_GEN7(engine), _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | @@ -674,6 +683,7 @@ static void execlists_reset(struct intel_engine_cs *engine) spin_lock_irqsave(&engine->irqlock, flags); list_splice_tail_init(&engine->pending, &engine->submitted); + list_splice_tail_init(&engine->submitted, &engine->requests); spin_unlock_irqrestore(&engine->irqlock, flags); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ae02b1757745..0147bd04c8e8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -666,7 +666,7 @@ static int engine_add_request(struct i915_gem_request *rq) static bool engine_rq_is_complete(struct i915_gem_request *rq) { - return __i915_seqno_passed(rq->engine->get_seqno(rq->engine), + return __i915_seqno_passed(intel_engine_get_seqno(rq->engine), rq->seqno); } @@ -816,7 +816,7 @@ static int chv_render_init_context(struct i915_gem_request *rq) { int ret; - ret = emit_lri(rq, 8, + ret = emit_lri(rq, 3, GEN8_ROW_CHICKEN, /* WaDisablePartialInstShootdown:chv */ @@ -1058,89 +1058,6 @@ gen6_emit_wait(struct i915_gem_request *waiter, return 0; } -#define PIPE_CONTROL_FLUSH(ring__, addr__) \ -do { \ - intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ - PIPE_CONTROL_DEPTH_STALL); \ - intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ - intel_ring_emit(ring__, 0); \ - intel_ring_emit(ring__, 0); \ -} while (0) - -static int -gen5_emit_breadcrumb(struct i915_gem_request *rq) -{ - u32 scratch_addr = rq->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; - struct intel_ringbuffer *ring; - - /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently - * incoherent with writes to memory, i.e. completely fubar, - * so we need to use PIPE_NOTIFY instead. - * - * However, we also need to workaround the qword write - * incoherence by flushing the 6 PIPE_NOTIFY buffers out to - * memory before requesting an interrupt. - */ - ring = intel_ring_begin(rq, 32); - if (IS_ERR(ring)) - return PTR_ERR(ring); - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_WRITE_FLUSH | - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); - intel_ring_emit(ring, rq->engine->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, rq->seqno); - intel_ring_emit(ring, 0); - - PIPE_CONTROL_FLUSH(ring, scratch_addr); - scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */ - PIPE_CONTROL_FLUSH(ring, scratch_addr); - scratch_addr += 2 * CACHELINE_BYTES; - PIPE_CONTROL_FLUSH(ring, scratch_addr); - scratch_addr += 2 * CACHELINE_BYTES; - PIPE_CONTROL_FLUSH(ring, scratch_addr); - scratch_addr += 2 * CACHELINE_BYTES; - PIPE_CONTROL_FLUSH(ring, scratch_addr); - scratch_addr += 2 * CACHELINE_BYTES; - PIPE_CONTROL_FLUSH(ring, scratch_addr); - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_WRITE_FLUSH | - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_NOTIFY); - intel_ring_emit(ring, rq->engine->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, rq->seqno); - intel_ring_emit(ring, 0); - - intel_ring_advance(ring); - - return 0; -} - -static u32 -ring_get_seqno(struct intel_engine_cs *engine) -{ - return intel_read_status_page(engine, I915_GEM_HWS_INDEX); -} - -static void -ring_set_seqno(struct intel_engine_cs *engine, u32 seqno) -{ - intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); -} - -static u32 -gen5_render_get_seqno(struct intel_engine_cs *engine) -{ - return engine->scratch.cpu_page[0]; -} - -static void -gen5_render_set_seqno(struct intel_engine_cs *engine, u32 seqno) -{ - engine->scratch.cpu_page[0] = seqno; -} - static bool gen5_irq_get(struct intel_engine_cs *engine) { @@ -1256,7 +1173,7 @@ bsd_emit_flush(struct i915_gem_request *rq, } static int -i9xx_emit_breadcrumb(struct i915_gem_request *rq) +emit_breadcrumb(struct i915_gem_request *rq) { struct intel_ringbuffer *ring; @@ -1332,8 +1249,6 @@ hsw_vebox_irq_get(struct intel_engine_cs *engine) if (!dev_priv->dev->irq_enabled) return false; - gen6_gt_force_wake_get(dev_priv, engine->power_domains); - spin_lock_irqsave(&dev_priv->irq_lock, flags); if (engine->irq_refcount++ == 0) { I915_WRITE_IMR(engine, @@ -1343,6 +1258,8 @@ hsw_vebox_irq_get(struct intel_engine_cs *engine) } spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + gen6_gt_force_wake_get(dev_priv, engine->power_domains); + return true; } @@ -1352,14 +1269,14 @@ hsw_vebox_irq_put(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; unsigned long flags; + gen6_gt_force_wake_put(dev_priv, engine->power_domains); + spin_lock_irqsave(&dev_priv->irq_lock, flags); if (--engine->irq_refcount == 0) { I915_WRITE_IMR(engine, ~engine->irq_keep_mask); gen6_disable_pm_irq(dev_priv, engine->irq_enable_mask); } spin_unlock_irqrestore(&dev_priv->irq_lock, flags); - - gen6_gt_force_wake_put(dev_priv, engine->power_domains); } static bool @@ -1668,10 +1585,8 @@ static int intel_engine_init(struct intel_engine_cs *engine, engine->resume = engine_resume; engine->cleanup = engine_cleanup; - engine->get_seqno = ring_get_seqno; - engine->set_seqno = ring_set_seqno; - engine->irq_barrier = nop_irq_barrier; + engine->emit_breadcrumb = emit_breadcrumb; engine->get_ring = engine_get_ring; engine->put_ring = engine_put_ring; @@ -1947,14 +1862,12 @@ int intel_init_render_engine(struct drm_i915_private *dev_priv) engine->init_context = chv_render_init_context; else engine->init_context = bdw_render_init_context; - engine->emit_breadcrumb = i9xx_emit_breadcrumb; engine->emit_flush = gen8_render_emit_flush; engine->irq_get = gen8_irq_get; engine->irq_put = gen8_irq_put; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; gen8_engine_init_semaphore(engine); } else if (INTEL_INFO(dev_priv)->gen >= 6) { - engine->emit_breadcrumb = i9xx_emit_breadcrumb; engine->emit_flush = gen7_render_emit_flush; if (INTEL_INFO(dev_priv)->gen == 6) engine->emit_flush = gen6_render_emit_flush; @@ -1984,17 +1897,13 @@ int intel_init_render_engine(struct drm_i915_private *dev_priv) engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; } } else if (IS_GEN5(dev_priv)) { - engine->emit_breadcrumb = gen5_emit_breadcrumb; engine->emit_flush = gen4_emit_flush; - engine->get_seqno = gen5_render_get_seqno; - engine->set_seqno = gen5_render_set_seqno; engine->irq_get = gen5_irq_get; engine->irq_put = gen5_irq_put; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; } else { - engine->emit_breadcrumb = i9xx_emit_breadcrumb; if (INTEL_INFO(dev_priv)->gen < 4) engine->emit_flush = gen2_emit_flush; else @@ -2044,7 +1953,7 @@ int intel_init_render_engine(struct drm_i915_private *dev_priv) engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); } - if (INTEL_INFO(dev_priv)->gen >= 5) { + if (INTEL_INFO(dev_priv)->gen >= 6) { ret = init_pipe_control(engine); if (ret) return ret; @@ -2073,7 +1982,6 @@ int intel_init_bsd_engine(struct drm_i915_private *dev_priv) if (IS_GEN6(dev_priv)) engine->write_tail = gen6_bsd_ring_write_tail; engine->emit_flush = gen6_bsd_emit_flush; - engine->emit_breadcrumb = i9xx_emit_breadcrumb; if (INTEL_INFO(dev_priv)->gen >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; @@ -2106,7 +2014,6 @@ int intel_init_bsd_engine(struct drm_i915_private *dev_priv) engine->mmio_base = BSD_RING_BASE; engine->emit_flush = bsd_emit_flush; - engine->emit_breadcrumb = i9xx_emit_breadcrumb; if (IS_GEN5(dev_priv)) { engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; engine->irq_get = gen5_irq_get; @@ -2146,7 +2053,6 @@ int intel_init_bsd2_engine(struct drm_i915_private *dev_priv) engine->mmio_base = GEN8_BSD2_RING_BASE; engine->emit_flush = gen6_bsd_emit_flush; - engine->emit_breadcrumb = i9xx_emit_breadcrumb; engine->emit_batchbuffer = gen8_emit_batchbuffer; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; @@ -2172,7 +2078,6 @@ int intel_init_blt_engine(struct drm_i915_private *dev_priv) engine->mmio_base = BLT_RING_BASE; engine->emit_flush = gen6_blt_emit_flush; - engine->emit_breadcrumb = i9xx_emit_breadcrumb; if (INTEL_INFO(dev_priv)->gen >= 8) { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; @@ -2227,7 +2132,6 @@ int intel_init_vebox_engine(struct drm_i915_private *dev_priv) engine->mmio_base = VEBOX_RING_BASE; engine->emit_flush = gen6_blt_emit_flush; - engine->emit_breadcrumb = i9xx_emit_breadcrumb; if (INTEL_INFO(dev_priv)->gen >= 8) { engine->irq_enable_mask = diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 46c8d2288821..1289714351dc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -21,8 +21,9 @@ * cacheline, the Head Pointer must not be greater than the Tail * Pointer." * - * To also accommodate errata on 830/845 which makes the last pair of cachlines - * in the ringbuffer unavailable, reduce the available space further. + * To also accommodate errata on 830/845 which makes the last pair of + * cachelines in the ringbuffer unavailable, reduce the available space + * further. */ #define I915_RING_RSVD (2*CACHELINE_BYTES) @@ -177,16 +178,6 @@ struct intel_engine_cs { int (*resume)(struct intel_engine_cs *engine); void (*cleanup)(struct intel_engine_cs *engine); - /* Some chipsets are not quite as coherent as advertised and need - * an expensive kick to force a true read of the up-to-date seqno. - * However, the up-to-date seqno is not always required and the last - * seen value is good enough. Note that the seqno will always be - * monotonic, even if not coherent. - */ - u32 (*get_seqno)(struct intel_engine_cs *engine); - void (*set_seqno)(struct intel_engine_cs *engine, - u32 seqno); - int (*init_context)(struct i915_gem_request *rq); int __must_check (*emit_flush)(struct i915_gem_request *rq, @@ -371,6 +362,12 @@ intel_write_status_page(struct intel_engine_cs *engine, #define I915_GEM_HWS_SCRATCH_INDEX 0x30 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +static inline u32 +intel_engine_get_seqno(struct intel_engine_cs *engine) +{ + return intel_read_status_page(engine, I915_GEM_HWS_INDEX); +} + struct intel_ringbuffer * intel_engine_alloc_ring(struct intel_engine_cs *engine, struct intel_context *ctx, -- 2.1.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx