Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Currently we allocate a scratch page for each engine, but since we only > ever write into it for post-sync operations, it is not exposed to > userspace nor do we care for coherency. As we then do not care about its > contents, we can use one page for all, reducing our allocations and > avoid complications by not assuming per-engine isolation. > > For later use, it simplifies engine initialisation (by removing the > allocation that required struct_mutex!) and means that we can always rely > on there being a scratch page. > > v2: Check that we allocated a large enough scratch for I830 w/a > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 7 ++++ > drivers/gpu/drm/i915/i915_gem.c | 50 ++++++++++++++++++++++++- > drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- > drivers/gpu/drm/i915/intel_engine_cs.c | 42 --------------------- > drivers/gpu/drm/i915/intel_lrc.c | 17 +++------ > drivers/gpu/drm/i915/intel_ringbuffer.c | 37 ++++++------------ > drivers/gpu/drm/i915/intel_ringbuffer.h | 5 --- > 7 files changed, 74 insertions(+), 86 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 23a3dc6f3907..c5f01964f0fb 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1983,6 +1983,8 @@ struct drm_i915_private { > struct delayed_work idle_work; > > ktime_t last_init_time; > + > + struct i915_vma *scratch; > } gt; > > /* perform PHY state sanity checks? */ > @@ -3713,4 +3715,9 @@ static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) > return I915_HWS_CSB_WRITE_INDEX; > } > > +static inline u32 i915_scratch_offset(const struct drm_i915_private *i915) > +{ > + return i915_ggtt_offset(i915->gt.scratch); > +} > + > #endif > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 35ecfea4e903..d36a9755ad91 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -5498,6 +5498,44 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) > goto out_ctx; > } > > +static int > +i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) > +{ > + struct drm_i915_gem_object *obj; > + struct i915_vma *vma; > + int ret; > + > + obj = i915_gem_object_create_stolen(i915, size); > + if (!obj) > + obj = i915_gem_object_create_internal(i915, size); > + if (IS_ERR(obj)) { > + DRM_ERROR("Failed to allocate scratch page\n"); > + return PTR_ERR(obj); > + } > + > + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); > + if (IS_ERR(vma)) { > + ret = PTR_ERR(vma); > + goto err_unref; > + } > + > + ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); > + if (ret) > + goto err_unref; > + > + i915->gt.scratch = vma; > + return 0; > + > +err_unref: > + i915_gem_object_put(obj); > + return ret; > +} > + > +static void i915_gem_fini_scratch(struct drm_i915_private *i915) > +{ > + i915_vma_unpin_and_release(&i915->gt.scratch, 0); > +} > + > int i915_gem_init(struct drm_i915_private *dev_priv) > { > int ret; > @@ -5544,12 +5582,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv) > goto err_unlock; > } > > - ret = i915_gem_contexts_init(dev_priv); > + ret = i915_gem_init_scratch(dev_priv, > + IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE); > if (ret) { > GEM_BUG_ON(ret == -EIO); > goto err_ggtt; > } > > + ret = i915_gem_contexts_init(dev_priv); > + if (ret) { > + GEM_BUG_ON(ret == -EIO); > + goto err_scratch; > + } > + > ret = intel_engines_init(dev_priv); > if (ret) { > GEM_BUG_ON(ret == -EIO); > @@ -5622,6 +5667,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) > err_context: > if (ret != -EIO) > i915_gem_contexts_fini(dev_priv); > +err_scratch: > + i915_gem_fini_scratch(dev_priv); > err_ggtt: > err_unlock: > intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); > @@ -5673,6 +5720,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) > intel_uc_fini(dev_priv); > i915_gem_cleanup_engines(dev_priv); > i915_gem_contexts_fini(dev_priv); > + i915_gem_fini_scratch(dev_priv); > mutex_unlock(&dev_priv->drm.struct_mutex); > > intel_wa_list_free(&dev_priv->gt_wa_list); > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c > index a6885a59568b..07465123c166 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -1571,7 +1571,7 @@ static void gem_record_rings(struct i915_gpu_state *error) > if (HAS_BROKEN_CS_TLB(i915)) > ee->wa_batchbuffer = > i915_error_object_create(i915, > - engine->scratch); > + i915->gt.scratch); > request_record_user_bo(request, ee); > > ee->ctx = > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index 6b427bc52f78..af2873403009 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -493,46 +493,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) > intel_engine_init_cmd_parser(engine); > } > > -int intel_engine_create_scratch(struct intel_engine_cs *engine, > - unsigned int size) > -{ > - struct drm_i915_gem_object *obj; > - struct i915_vma *vma; > - int ret; > - > - WARN_ON(engine->scratch); > - > - obj = i915_gem_object_create_stolen(engine->i915, size); > - if (!obj) > - obj = i915_gem_object_create_internal(engine->i915, size); > - if (IS_ERR(obj)) { > - DRM_ERROR("Failed to allocate scratch page\n"); > - return PTR_ERR(obj); > - } > - > - vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); > - if (IS_ERR(vma)) { > - ret = PTR_ERR(vma); > - goto err_unref; > - } > - > - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); > - if (ret) > - goto err_unref; > - > - engine->scratch = vma; > - return 0; > - > -err_unref: > - i915_gem_object_put(obj); > - return ret; > -} > - > -void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) > -{ > - i915_vma_unpin_and_release(&engine->scratch, 0); > -} > - > static void cleanup_status_page(struct intel_engine_cs *engine) > { > if (HWS_NEEDS_PHYSICAL(engine->i915)) { > @@ -707,8 +667,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) > { > struct drm_i915_private *i915 = engine->i915; > > - intel_engine_cleanup_scratch(engine); > - > cleanup_status_page(engine); > > intel_engine_fini_breadcrumbs(engine); > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 87227fd9ae5f..d7fa301b5ec7 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -1288,9 +1288,10 @@ static int execlists_request_alloc(struct i915_request *request) > static u32 * > gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) > { > + /* NB no one else is allowed to scribble over scratch + 256! */ > *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; > *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); > - *batch++ = i915_ggtt_offset(engine->scratch) + 256; > + *batch++ = i915_scratch_offset(engine->i915) + 256; > *batch++ = 0; > > *batch++ = MI_LOAD_REGISTER_IMM(1); > @@ -1304,7 +1305,7 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) > > *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; > *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); > - *batch++ = i915_ggtt_offset(engine->scratch) + 256; > + *batch++ = i915_scratch_offset(engine->i915) + 256; > *batch++ = 0; > > return batch; > @@ -1341,7 +1342,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) > PIPE_CONTROL_GLOBAL_GTT_IVB | > PIPE_CONTROL_CS_STALL | > PIPE_CONTROL_QW_WRITE, > - i915_ggtt_offset(engine->scratch) + > + i915_scratch_offset(engine->i915) + > 2 * CACHELINE_BYTES); > > *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; > @@ -1973,7 +1974,7 @@ static int gen8_emit_flush_render(struct i915_request *request, > { > struct intel_engine_cs *engine = request->engine; > u32 scratch_addr = > - i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; > + i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES; > bool vf_flush_wa = false, dc_flush_wa = false; > u32 *cs, flags = 0; > int len; > @@ -2292,10 +2293,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine) > if (ret) > return ret; > > - ret = intel_engine_create_scratch(engine, PAGE_SIZE); > - if (ret) > - goto err_cleanup_common; > - > ret = intel_init_workaround_bb(engine); > if (ret) { > /* > @@ -2311,10 +2308,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine) > intel_engine_init_workarounds(engine); > > return 0; > - > -err_cleanup_common: > - intel_engine_cleanup_common(engine); > - return ret; > } > > int logical_xcs_ring_init(struct intel_engine_cs *engine) > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 7f88df5bff09..c5eb26a7ee79 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -150,8 +150,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) > */ > if (mode & EMIT_INVALIDATE) { > *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; > - *cs++ = i915_ggtt_offset(rq->engine->scratch) | > - PIPE_CONTROL_GLOBAL_GTT; > + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; > *cs++ = 0; > *cs++ = 0; > > @@ -159,8 +158,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) > *cs++ = MI_FLUSH; > > *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; > - *cs++ = i915_ggtt_offset(rq->engine->scratch) | > - PIPE_CONTROL_GLOBAL_GTT; > + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT; > *cs++ = 0; > *cs++ = 0; > } > @@ -212,8 +210,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) > static int > intel_emit_post_sync_nonzero_flush(struct i915_request *rq) > { > - u32 scratch_addr = > - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; > + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; > u32 *cs; > > cs = intel_ring_begin(rq, 6); > @@ -246,8 +243,7 @@ intel_emit_post_sync_nonzero_flush(struct i915_request *rq) > static int > gen6_render_ring_flush(struct i915_request *rq, u32 mode) > { > - u32 scratch_addr = > - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; > + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; > u32 *cs, flags = 0; > int ret; > > @@ -316,8 +312,7 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq) > static int > gen7_render_ring_flush(struct i915_request *rq, u32 mode) > { > - u32 scratch_addr = > - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; > + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES; > u32 *cs, flags = 0; > > /* > @@ -994,7 +989,7 @@ i965_emit_bb_start(struct i915_request *rq, > } > > /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ > -#define I830_BATCH_LIMIT (256*1024) > +#define I830_BATCH_LIMIT SZ_256K > #define I830_TLB_ENTRIES (2) > #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) > static int > @@ -1002,7 +997,9 @@ i830_emit_bb_start(struct i915_request *rq, > u64 offset, u32 len, > unsigned int dispatch_flags) > { > - u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch); > + u32 *cs, cs_offset = i915_scratch_offset(rq->i915); > + > + GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE); > > cs = intel_ring_begin(rq, 6); > if (IS_ERR(cs)) > @@ -1459,7 +1456,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) > { > struct i915_timeline *timeline; > struct intel_ring *ring; > - unsigned int size; > int err; > > intel_engine_setup_common(engine); > @@ -1484,21 +1480,12 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) > GEM_BUG_ON(engine->buffer); > engine->buffer = ring; > > - size = PAGE_SIZE; > - if (HAS_BROKEN_CS_TLB(engine->i915)) > - size = I830_WA_SIZE; > - err = intel_engine_create_scratch(engine, size); > - if (err) > - goto err_unpin; > - > err = intel_engine_init_common(engine); > if (err) > - goto err_scratch; > + goto err_unpin; > > return 0; > > -err_scratch: > - intel_engine_cleanup_scratch(engine); > err_unpin: > intel_ring_unpin(ring); > err_ring: > @@ -1572,7 +1559,7 @@ static int flush_pd_dir(struct i915_request *rq) > /* Stall until the page table load is complete */ > *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; > *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); > - *cs++ = i915_ggtt_offset(engine->scratch); > + *cs++ = i915_scratch_offset(rq->i915); > *cs++ = MI_NOOP; > > intel_ring_advance(rq, cs); > @@ -1681,7 +1668,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) > /* Insert a delay before the next switch! */ > *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; > *cs++ = i915_mmio_reg_offset(last_reg); > - *cs++ = i915_ggtt_offset(engine->scratch); > + *cs++ = i915_scratch_offset(rq->i915); > *cs++ = MI_NOOP; > } > *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 927bb21a2b0b..72edaa7ff411 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -439,7 +439,6 @@ struct intel_engine_cs { > struct i915_wa_list ctx_wa_list; > struct i915_wa_list wa_list; > struct i915_wa_list whitelist; > - struct i915_vma *scratch; > > u32 irq_keep_mask; /* always keep these interrupts */ > u32 irq_enable_mask; /* bitmask to enable ring interrupt */ > @@ -896,10 +895,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine); > int intel_engine_init_common(struct intel_engine_cs *engine); > void intel_engine_cleanup_common(struct intel_engine_cs *engine); > > -int intel_engine_create_scratch(struct intel_engine_cs *engine, > - unsigned int size); > -void intel_engine_cleanup_scratch(struct intel_engine_cs *engine); > - > int intel_init_render_ring_buffer(struct intel_engine_cs *engine); > int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); > int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); > -- > 2.20.0.rc2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx