On 19/04/16 07:49, Chris Wilson wrote:
Since we do the l3-remap on context switch, and proceed to do a context switch immediately after manually doing the l3-remap, we can remove the redundant manual call.
Looks like it should say it is removing the l3-remap on driver load, not "manual".
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 39 +-------------------------------- drivers/gpu/drm/i915/i915_gem_context.c | 31 +++++++++++++++++++++++++- 3 files changed, 31 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 368fd7090189..4c55f480f60b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2983,7 +2983,6 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_init(struct drm_device *dev); int i915_gem_init_engines(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); -int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_engines(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c9d7dfc8f80c..fd9a36badb45 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4077,35 +4077,6 @@ err: return ret; } -int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) -{ - struct intel_engine_cs *engine = req->engine; - u32 *remap_info = to_i915(req)->l3_parity.remap_info[slice];
Wrong base.
- int i, ret; - - if (!HAS_L3_DPF(req) || !remap_info) - return 0; - - ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); - if (ret) - return ret; - - /* - * Note: We do not worry about the concurrent register cacheline hang - * here because no other code should access these registers other than - * at initialization time. - */ - for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) { - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); - intel_ring_emit(engine, remap_info[i]); - } - - intel_ring_advance(engine); - - return ret; -} - void i915_gem_init_swizzling(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -4210,7 +4181,7 @@ i915_gem_init_hw(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *engine; - int ret, j; + int ret; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -4284,14 +4255,6 @@ i915_gem_init_hw(struct drm_device *dev) break; } - if (engine->id == RCS) { - for (j = 0; j < NUM_L3_SLICES(dev); j++) { - ret = i915_gem_l3_remap(req, j); - if (ret) - goto err_request; - } - } - ret = i915_ppgtt_init_ring(req); if (ret) goto err_request; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index db53d811370d..6870556a180b 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -691,6 +691,35 @@ needs_pd_load_post(struct intel_context *to, u32 hw_flags) return false; } +static int remap_l3(struct drm_i915_gem_request *req, int slice) +{ + u32 *remap_info = to_i915(req)->l3_parity.remap_info[slice]; + struct intel_engine_cs *engine = req->engine; + int i, ret; + + if (!remap_info) + return 0;
HAS_L3_DPF check is gone. Ok it looks correct from the call site, but best would be not to change it when moving around.
+ + ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
More changes - should you perhaps add a patch which simplifies l3 remap to use since lri ahead of this one?
+ if (ret) + return ret; + + /* + * Note: We do not worry about the concurrent register cacheline hang + * here because no other code should access these registers other than + * at initialization time. + */ + intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); + for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { + intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); + intel_ring_emit(engine, remap_info[i]); + } + intel_ring_emit(engine, MI_NOOP); + intel_ring_advance(engine); + + return 0; +} + static int do_rcs_switch(struct drm_i915_gem_request *req) { struct intel_context *to = req->ctx; @@ -810,7 +839,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (!(to->remap_slice & (1<<i))) continue; - ret = i915_gem_l3_remap(req, i); + ret = remap_l3(req, i); if (ret) return ret;
Otherwise looks safe, although I am not sure why it belongs in this series. Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx