Currently we allocate a scratch page for each engine, but since we only ever write into it for post-sync operations, it is not exposed to userspace nor do we care for coherency. As we then do not care about its contents, we can use one page for all, reducing our allocations and avoid complications by not assuming per-engine isolation. For later use, it simplifies engine initialisation (by removing the allocation that required struct_mutex!) and means that we can always rely on there being a scratch page. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 7 ++++ drivers/gpu/drm/i915/i915_gem.c | 50 ++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 42 --------------------- drivers/gpu/drm/i915/intel_lrc.c | 17 +++------ drivers/gpu/drm/i915/intel_ringbuffer.c | 33 +++++----------- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 --- 7 files changed, 71 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 43ac6873a2bb..6cd576424ec0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1995,6 +1995,8 @@ struct drm_i915_private { struct delayed_work idle_work; ktime_t last_init_time; + + struct i915_vma *scratch; } gt; /* perform PHY state sanity checks? */ @@ -3723,4 +3725,9 @@ static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) return I915_HWS_CSB_WRITE_INDEX; } +static inline u32 i915_scratch_offset(const struct drm_i915_private *i915) +{ + return i915_ggtt_offset(i915->gt.scratch); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0eaa8a6f0423..a6f798333e24 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5497,6 +5497,44 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) goto out_ctx; } +static int +i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + obj = i915_gem_object_create_stolen(i915, size); + if (!obj) + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate scratch page\n"); + return PTR_ERR(obj); + } + + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + + ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (ret) + goto err_unref; + + i915->gt.scratch = vma; + return 0; + +err_unref: + i915_gem_object_put(obj); + return ret; +} + +static void i915_gem_fini_scratch(struct drm_i915_private *i915) +{ + i915_vma_unpin_and_release(&i915->gt.scratch, 0); +} + int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; @@ -5543,12 +5581,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_unlock; } - ret = i915_gem_contexts_init(dev_priv); + ret = i915_gem_init_scratch(dev_priv, + IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_ggtt; } + ret = i915_gem_contexts_init(dev_priv); + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_scratch; + } + ret = intel_engines_init(dev_priv); if (ret) { GEM_BUG_ON(ret == -EIO); @@ -5621,6 +5666,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_context: if (ret != -EIO) i915_gem_contexts_fini(dev_priv); +err_scratch: + i915_gem_fini_scratch(dev_priv); err_ggtt: err_unlock: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -5672,6 +5719,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_contexts_fini(dev_priv); + i915_gem_fini_scratch(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); intel_cleanup_gt_powersave(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a6885a59568b..07465123c166 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1571,7 +1571,7 @@ static void gem_record_rings(struct i915_gpu_state *error) if (HAS_BROKEN_CS_TLB(i915)) ee->wa_batchbuffer = i915_error_object_create(i915, - engine->scratch); + i915->gt.scratch); request_record_user_bo(request, ee); ee->ctx = diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 759c0fd58f8c..2390985384d6 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -493,46 +493,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_cmd_parser(engine); } -int intel_engine_create_scratch(struct intel_engine_cs *engine, - unsigned int size) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int ret; - - WARN_ON(engine->scratch); - - obj = i915_gem_object_create_stolen(engine->i915, size); - if (!obj) - obj = i915_gem_object_create_internal(engine->i915, size); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate scratch page\n"); - return PTR_ERR(obj); - } - - vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_unref; - } - - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (ret) - goto err_unref; - - engine->scratch = vma; - return 0; - -err_unref: - i915_gem_object_put(obj); - return ret; -} - -void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) -{ - i915_vma_unpin_and_release(&engine->scratch, 0); -} - static void cleanup_status_page(struct intel_engine_cs *engine) { if (HWS_NEEDS_PHYSICAL(engine->i915)) { @@ -707,8 +667,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - intel_engine_cleanup_scratch(engine); - cleanup_status_page(engine); intel_engine_fini_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b5511a054f30..de070dca4033 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1286,9 +1286,10 @@ static int execlists_request_alloc(struct i915_request *request) static u32 * gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) { + /* NB no one else is allowed to scribble over scratch + 256! */ *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = i915_scratch_offset(engine->i915) + 256; *batch++ = 0; *batch++ = MI_LOAD_REGISTER_IMM(1); @@ -1302,7 +1303,7 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = i915_scratch_offset(engine->i915) + 256; *batch++ = 0; return batch; @@ -1339,7 +1340,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE, - i915_ggtt_offset(engine->scratch) + + i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES); *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -1969,7 +1970,7 @@ static int gen8_emit_flush_render(struct i915_request *request, { struct intel_engine_cs *engine = request->engine; u32 scratch_addr = - i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; + i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; u32 *cs, flags = 0; int len; @@ -2306,10 +2307,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine) if (ret) return ret; - ret = intel_engine_create_scratch(engine, PAGE_SIZE); - if (ret) - goto err_cleanup_common; - ret = intel_init_workaround_bb(engine); if (ret) { /* @@ -2322,10 +2319,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine) } return 0; - -err_cleanup_common: - intel_engine_cleanup_common(engine); - return ret; } int logical_xcs_ring_init(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9875ee474b83..1b7141a171b6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -150,8 +150,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) */ if (mode & EMIT_INVALIDATE) { *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = i915_ggtt_offset(rq->engine->scratch) | - PIPE_CONTROL_GLOBAL_GTT; + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; *cs++ = 0; @@ -159,8 +158,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) *cs++ = MI_FLUSH; *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = i915_ggtt_offset(rq->engine->scratch) | - PIPE_CONTROL_GLOBAL_GTT; + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; *cs++ = 0; } @@ -212,8 +210,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) static int intel_emit_post_sync_nonzero_flush(struct i915_request *rq) { - u32 scratch_addr = - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; u32 *cs; cs = intel_ring_begin(rq, 6); @@ -246,8 +243,7 @@ intel_emit_post_sync_nonzero_flush(struct i915_request *rq) static int gen6_render_ring_flush(struct i915_request *rq, u32 mode) { - u32 scratch_addr = - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; u32 *cs, flags = 0; int ret; @@ -316,8 +312,7 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq) static int gen7_render_ring_flush(struct i915_request *rq, u32 mode) { - u32 scratch_addr = - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; u32 *cs, flags = 0; /* @@ -1004,7 +999,7 @@ i830_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { - u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch); + u32 *cs, cs_offset = i915_scratch_offset(rq->i915); cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -1461,7 +1456,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) { struct i915_timeline *timeline; struct intel_ring *ring; - unsigned int size; int err; intel_engine_setup_common(engine); @@ -1486,21 +1480,12 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) GEM_BUG_ON(engine->buffer); engine->buffer = ring; - size = PAGE_SIZE; - if (HAS_BROKEN_CS_TLB(engine->i915)) - size = I830_WA_SIZE; - err = intel_engine_create_scratch(engine, size); - if (err) - goto err_unpin; - err = intel_engine_init_common(engine); if (err) - goto err_scratch; + goto err_unpin; return 0; -err_scratch: - intel_engine_cleanup_scratch(engine); err_unpin: intel_ring_unpin(ring); err_ring: @@ -1574,7 +1559,7 @@ static int flush_pd_dir(struct i915_request *rq) /* Stall until the page table load is complete */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); - *cs++ = i915_ggtt_offset(engine->scratch); + *cs++ = i915_scratch_offset(rq->i915); *cs++ = MI_NOOP; intel_ring_advance(rq, cs); @@ -1683,7 +1668,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) /* Insert a delay before the next switch! */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = i915_ggtt_offset(engine->scratch); + *cs++ = i915_scratch_offset(rq->i915); *cs++ = MI_NOOP; } *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8a2270b209b0..970fb5c05c36 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -451,7 +451,6 @@ struct intel_engine_cs { struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; - struct i915_vma *scratch; u32 irq_keep_mask; /* always keep these interrupts */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */ @@ -908,10 +907,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); -int intel_engine_create_scratch(struct intel_engine_cs *engine, - unsigned int size); -void intel_engine_cleanup_scratch(struct intel_engine_cs *engine); - int intel_init_render_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); -- 2.20.0.rc1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx