Space for flushing the GPU cache prior to completing the request is preallocated and so cannot fail. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +--- drivers/gpu/drm/i915/i915_gem_gtt.c | 18 ++++---- drivers/gpu/drm/i915/i915_gem_request.c | 7 ++- drivers/gpu/drm/i915/intel_lrc.c | 47 +++---------------- drivers/gpu/drm/i915/intel_lrc.h | 2 - drivers/gpu/drm/i915/intel_ringbuffer.c | 72 +++++++----------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 --- 8 files changed, 39 insertions(+), 125 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 17fe8ed991d6..c078ebc29da5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -534,7 +534,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * itlb_before_ctx_switch. */ if (IS_GEN6(req->i915)) { - ret = req->engine->flush(req, I915_GEM_GPU_DOMAINS, 0); + ret = req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 731ce13dbdbc..a56fae99a1bc 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -969,10 +969,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, if (flush_domains & I915_GEM_DOMAIN_GTT) wmb(); - /* Unconditionally invalidate gpu caches and ensure that we do flush - * any residual writes from the previous batch. - */ - return intel_engine_invalidate_all_caches(req); + /* Unconditionally invalidate gpu caches and TLBs. */ + return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); } static bool @@ -1138,9 +1136,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, static void i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) { - /* Unconditionally force add_request to emit a full flush. */ - params->ring->gpu_caches_dirty = true; - /* Add a breadcrumb for the completion of the batch buffer */ __i915_add_request(params->request, params->batch_obj, true); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9a91451d66ac..cddbd8c00663 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1652,9 +1652,9 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = req->engine->flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -1690,9 +1690,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = req->engine->flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -1710,9 +1710,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, /* XXX: RCS is the only one to auto invalidate the TLBs? */ if (req->engine->id != RCS) { - ret = req->engine->flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index e1f2af046b6c..e911430575fe 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -426,10 +426,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, * what. */ if (flush_caches) { - if (i915.enable_execlists) - ret = logical_ring_flush_all_caches(request); - else - ret = intel_engine_flush_all_caches(request); + ret = request->engine->emit_flush(request, + 0, I915_GEM_GPU_DOMAINS); + /* Not allowed to fail! */ WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3a80d9d45f5c..b889680f7491 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -616,24 +616,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request) return 0; } -static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - uint32_t flush_domains; - int ret; - - flush_domains = 0; - if (engine->gpu_caches_dirty) - flush_domains = I915_GEM_GPU_DOMAINS; - - ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - if (ret) - return ret; - - engine->gpu_caches_dirty = false; - return 0; -} - static int execlists_move_to_gpu(struct drm_i915_gem_request *req, struct list_head *vmas) { @@ -664,7 +646,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req, /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. */ - return logical_ring_invalidate_all_caches(req); + return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); } int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) @@ -860,22 +842,6 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); } -int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - int ret; - - if (!engine->gpu_caches_dirty) - return 0; - - ret = engine->emit_flush(req, 0, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; - - engine->gpu_caches_dirty = false; - return 0; -} - static int intel_lr_context_do_pin(struct intel_engine_cs *ring, struct drm_i915_gem_object *ctx_obj, struct intel_ring *ringbuf) @@ -946,7 +912,6 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq) static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) { int ret, i; - struct intel_engine_cs *engine = req->engine; struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; struct i915_workarounds *w = &dev_priv->workarounds; @@ -954,8 +919,9 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) if (w->count == 0) return 0; - engine->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -972,8 +938,9 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) intel_ring_advance(ring); - engine->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index c88988a41898..7f01d2ddacfa 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -60,8 +60,6 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_init(struct drm_device *dev); -int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); - /* Logical Ring Contexts */ /* One extra page is added before LRC for GuC as shared data */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 74a4a54e6ca5..e584b0f631f8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -696,8 +696,9 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) if (w->count == 0) return 0; - req->engine->gpu_caches_dirty = true; - ret = intel_engine_flush_all_caches(req); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -714,8 +715,9 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) intel_ring_advance(ring); - req->engine->gpu_caches_dirty = true; - ret = intel_engine_flush_all_caches(req); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -2509,7 +2511,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->init_context = intel_rcs_ctx_init; ring->add_request = gen6_add_request; - ring->flush = gen8_render_ring_flush; + ring->emit_flush = gen8_render_ring_flush; ring->irq_enable = gen8_ring_enable_irq; ring->irq_disable = gen8_ring_disable_irq; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; @@ -2523,9 +2525,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev) } else if (INTEL_INFO(dev)->gen >= 6) { ring->init_context = intel_rcs_ctx_init; ring->add_request = gen6_add_request; - ring->flush = gen7_render_ring_flush; + ring->emit_flush = gen7_render_ring_flush; if (INTEL_INFO(dev)->gen == 6) - ring->flush = gen6_render_ring_flush; + ring->emit_flush = gen6_render_ring_flush; ring->irq_enable = gen6_ring_enable_irq; ring->irq_disable = gen6_ring_disable_irq; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; @@ -2553,7 +2555,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) } } else if (IS_GEN5(dev)) { ring->add_request = pc_render_add_request; - ring->flush = gen4_render_ring_flush; + ring->emit_flush = gen4_render_ring_flush; ring->irq_enable = gen5_ring_enable_irq; ring->irq_disable = gen5_ring_disable_irq; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT | @@ -2561,9 +2563,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev) } else { ring->add_request = i9xx_add_request; if (INTEL_INFO(dev)->gen < 4) - ring->flush = gen2_render_ring_flush; + ring->emit_flush = gen2_render_ring_flush; else - ring->flush = gen4_render_ring_flush; + ring->emit_flush = gen4_render_ring_flush; if (IS_GEN2(dev)) { ring->irq_enable = i8xx_ring_enable_irq; ring->irq_disable = i8xx_ring_disable_irq; @@ -2636,7 +2638,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) /* gen6 bsd needs a special wa for tail updates */ if (IS_GEN6(dev)) ring->write_tail = gen6_bsd_ring_write_tail; - ring->flush = gen6_bsd_ring_flush; + ring->emit_flush = gen6_bsd_ring_flush; ring->add_request = gen6_add_request; ring->irq_seqno_barrier = gen6_seqno_barrier; if (INTEL_INFO(dev)->gen >= 8) { @@ -2674,7 +2676,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) } } else { ring->mmio_base = BSD_RING_BASE; - ring->flush = bsd_ring_flush; + ring->emit_flush = bsd_ring_flush; ring->add_request = i9xx_add_request; if (IS_GEN5(dev)) { ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT; @@ -2705,7 +2707,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) ring->write_tail = ring_write_tail; ring->mmio_base = GEN8_BSD2_RING_BASE; - ring->flush = gen6_bsd_ring_flush; + ring->emit_flush = gen6_bsd_ring_flush; ring->add_request = gen6_add_request; ring->irq_seqno_barrier = gen6_seqno_barrier; ring->irq_enable_mask = @@ -2734,7 +2736,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) ring->mmio_base = BLT_RING_BASE; ring->write_tail = ring_write_tail; - ring->flush = gen6_ring_flush; + ring->emit_flush = gen6_ring_flush; ring->add_request = gen6_add_request; ring->irq_seqno_barrier = gen6_seqno_barrier; if (INTEL_INFO(dev)->gen >= 8) { @@ -2790,7 +2792,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) ring->mmio_base = VEBOX_RING_BASE; ring->write_tail = ring_write_tail; - ring->flush = gen6_ring_flush; + ring->emit_flush = gen6_ring_flush; ring->add_request = gen6_add_request; ring->irq_seqno_barrier = gen6_seqno_barrier; @@ -2830,46 +2832,6 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) return intel_init_engine(dev, ring); } -int -intel_engine_flush_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - int ret; - - if (!engine->gpu_caches_dirty) - return 0; - - ret = engine->flush(req, 0, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; - - trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS); - - engine->gpu_caches_dirty = false; - return 0; -} - -int -intel_engine_invalidate_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - uint32_t flush_domains; - int ret; - - flush_domains = 0; - if (engine->gpu_caches_dirty) - flush_domains = I915_GEM_GPU_DOMAINS; - - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - if (ret) - return ret; - - trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - - engine->gpu_caches_dirty = false; - return 0; -} - void intel_engine_stop(struct intel_engine_cs *ring) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 15d067b9b8a2..fdeadae726b8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -215,9 +215,6 @@ struct intel_engine_cs { void (*write_tail)(struct intel_engine_cs *ring, u32 value); - int __must_check (*flush)(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains); int (*add_request)(struct drm_i915_gem_request *req); /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. @@ -332,8 +329,6 @@ struct intel_engine_cs { u32 last_submitted_seqno; unsigned user_interrupts; - bool gpu_caches_dirty; - struct intel_context *default_context; struct intel_context *last_context; @@ -486,8 +481,6 @@ int intel_ring_space(struct intel_ring *ringbuf); int __must_check intel_engine_idle(struct intel_engine_cs *ring); void intel_engine_init_seqno(struct intel_engine_cs *ring, u32 seqno); -int intel_engine_flush_all_caches(struct drm_i915_gem_request *req); -int intel_engine_invalidate_all_caches(struct drm_i915_gem_request *req); void intel_fini_pipe_control(struct intel_engine_cs *ring); int intel_init_pipe_control(struct intel_engine_cs *ring); -- 2.7.0.rc3 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx