On Tue, Apr 12, 2016 at 09:03:08PM +0100, Chris Wilson wrote: > After mi_set_context() succeeds, we need to update the state of the > engine's last_context. This ensures that we hold a pin on the context > whilst the hardware may write to it. However, since we didn't complete > the post-switch setup of the context, we need to force the subsequent > use of the same context to complete the setup (which means updating > should_skip_switch()). > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> Specializing do_rcs_switch makes sense, but shouldn't be intermingled with the bugfix. Assuming I'm reading things correctly the real bugfix is to add the check for hw_ctx.initialized to should_skip_switch()? If so, please split into 2 patches - first to add just that check, 2nd to do the RCS specialization. -Daniel > --- > drivers/gpu/drm/i915/i915_gem_context.c | 215 ++++++++++++++++---------------- > 1 file changed, 109 insertions(+), 106 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index e5ad7b21e356..361959b1d53a 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -609,50 +609,40 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) > return ret; > } > > -static inline bool should_skip_switch(struct intel_engine_cs *engine, > - struct intel_context *from, > +static inline bool should_skip_switch(struct intel_context *from, > struct intel_context *to) > { > if (to->remap_slice) > return false; > > - if (to->ppgtt && from == to && > - !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) > - return true; > + if (!to->legacy_hw_ctx.initialized) > + return false; > > - return false; > + if (to->ppgtt && to->ppgtt->pd_dirty_rings & (1 << RCS)) > + return false; > + > + return to == from; > } > > static bool > -needs_pd_load_pre(struct intel_engine_cs *engine, struct intel_context *to) > +needs_pd_load_pre(struct intel_context *to) > { > - struct drm_i915_private *dev_priv = engine->dev->dev_private; > - > if (!to->ppgtt) > return false; > > - if (INTEL_INFO(engine->dev)->gen < 8) > - return true; > - > - if (engine != &dev_priv->engine[RCS]) > + if (INTEL_INFO(to->i915)->gen < 8) > return true; > > return false; > } > > static bool > -needs_pd_load_post(struct intel_engine_cs *engine, struct intel_context *to, > - u32 hw_flags) > +needs_pd_load_post(struct intel_context *to, u32 hw_flags) > { > - struct drm_i915_private *dev_priv = engine->dev->dev_private; > - > if (!to->ppgtt) > return false; > > - if (!IS_GEN8(engine->dev)) > - return false; > - > - if (engine != &dev_priv->engine[RCS]) > + if (!IS_GEN8(to->i915)) > return false; > > if (hw_flags & MI_RESTORE_INHIBIT) > @@ -661,60 +651,33 @@ needs_pd_load_post(struct intel_engine_cs *engine, struct intel_context *to, > return false; > } > > -static int do_switch(struct drm_i915_gem_request *req) > +static int do_rcs_switch(struct drm_i915_gem_request *req) > { > struct intel_context *to = req->ctx; > struct intel_engine_cs *engine = req->engine; > - struct drm_i915_private *dev_priv = req->i915; > - struct intel_context *from = engine->last_context; > - u32 hw_flags = 0; > - bool uninitialized = false; > + struct intel_context *from; > + u32 hw_flags; > int ret, i; > > - if (from != NULL && engine == &dev_priv->engine[RCS]) { > - BUG_ON(from->legacy_hw_ctx.rcs_state == NULL); > - BUG_ON(!i915_gem_obj_is_pinned(from->legacy_hw_ctx.rcs_state)); > - } > - > - if (should_skip_switch(engine, from, to)) > + if (should_skip_switch(engine->last_context, to)) > return 0; > > /* Trying to pin first makes error handling easier. */ > - if (engine == &dev_priv->engine[RCS]) { > - ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state, > - get_context_alignment(engine->dev), > - 0); > - if (ret) > - return ret; > - } > + ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state, > + get_context_alignment(engine->dev), > + 0); > + if (ret) > + return ret; > > /* > * Pin can switch back to the default context if we end up calling into > * evict_everything - as a last ditch gtt defrag effort that also > * switches to the default context. Hence we need to reload from here. > + * > + * XXX: Doing so is painfully broken! > */ > from = engine->last_context; > > - if (needs_pd_load_pre(engine, to)) { > - /* Older GENs and non render rings still want the load first, > - * "PP_DCLV followed by PP_DIR_BASE register through Load > - * Register Immediate commands in Ring Buffer before submitting > - * a context."*/ > - trace_switch_mm(engine, to); > - ret = to->ppgtt->switch_mm(to->ppgtt, req); > - if (ret) > - goto unpin_out; > - > - /* Doing a PD load always reloads the page dirs */ > - to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); > - } > - > - if (engine != &dev_priv->engine[RCS]) { > - if (from) > - i915_gem_context_unreference(from); > - goto done; > - } > - > /* > * Clear this page out of any CPU caches for coherent swap-in/out. Note > * that thanks to write = false in this call and us not setting any gpu > @@ -727,55 +690,46 @@ static int do_switch(struct drm_i915_gem_request *req) > if (ret) > goto unpin_out; > > - if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) { > - hw_flags |= MI_RESTORE_INHIBIT; > + if (needs_pd_load_pre(to)) { > + /* Older GENs and non render rings still want the load first, > + * "PP_DCLV followed by PP_DIR_BASE register through Load > + * Register Immediate commands in Ring Buffer before submitting > + * a context."*/ > + trace_switch_mm(engine, to); > + ret = to->ppgtt->switch_mm(to->ppgtt, req); > + if (ret) > + goto unpin_out; > + > + /* Doing a PD load always reloads the page dirs */ > + to->ppgtt->pd_dirty_rings &= ~(1 << RCS); > + } > + > + if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) > /* NB: If we inhibit the restore, the context is not allowed to > * die because future work may end up depending on valid address > * space. This means we must enforce that a page table load > * occur when this occurs. */ > - } else if (to->ppgtt && > - (intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) { > - hw_flags |= MI_FORCE_RESTORE; > - to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); > - } > + hw_flags = MI_RESTORE_INHIBIT; > + else if (to->ppgtt && to->ppgtt->pd_dirty_rings & (1 << RCS)) > + hw_flags = MI_FORCE_RESTORE; > + else > + hw_flags = 0; > > /* We should never emit switch_mm more than once */ > - WARN_ON(needs_pd_load_pre(engine, to) && > - needs_pd_load_post(engine, to, hw_flags)); > + WARN_ON(needs_pd_load_pre(to) && needs_pd_load_post(to, hw_flags)); > > - ret = mi_set_context(req, hw_flags); > - if (ret) > - goto unpin_out; > - > - /* GEN8 does *not* require an explicit reload if the PDPs have been > - * setup, and we do not wish to move them. > - */ > - if (needs_pd_load_post(engine, to, hw_flags)) { > - trace_switch_mm(engine, to); > - ret = to->ppgtt->switch_mm(to->ppgtt, req); > - /* The hardware context switch is emitted, but we haven't > - * actually changed the state - so it's probably safe to bail > - * here. Still, let the user know something dangerous has > - * happened. > - */ > + if (to != from || (hw_flags & MI_FORCE_RESTORE)) { > + ret = mi_set_context(req, hw_flags); > if (ret) { > - DRM_ERROR("Failed to change address space on context switch\n"); > + /* Force the reload of this and the previous mm */ > + if (needs_pd_load_pre(to)) > + to->ppgtt->pd_dirty_rings |= 1 << RCS; > + if (from && needs_pd_load_pre(from)) > + from->ppgtt->pd_dirty_rings |= 1 << RCS; > goto unpin_out; > } > } > > - for (i = 0; i < MAX_L3_SLICES; i++) { > - if (!(to->remap_slice & (1<<i))) > - continue; > - > - ret = i915_gem_l3_remap(req, i); > - /* If it failed, try again next round */ > - if (ret) > - DRM_DEBUG_DRIVER("L3 remapping failed\n"); > - else > - to->remap_slice &= ~(1<<i); > - } > - > /* The backing object for the context is done after switching to the > * *next* context. Therefore we cannot retire the previous context until > * the next context has already started running. In fact, the below code > @@ -798,27 +752,52 @@ static int do_switch(struct drm_i915_gem_request *req) > i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state); > i915_gem_context_unreference(from); > } > - > - uninitialized = !to->legacy_hw_ctx.initialized; > - to->legacy_hw_ctx.initialized = true; > - > -done: > i915_gem_context_reference(to); > engine->last_context = to; > > - if (uninitialized) { > + /* GEN8 does *not* require an explicit reload if the PDPs have been > + * setup, and we do not wish to move them. > + */ > + if (needs_pd_load_post(to, hw_flags)) { > + trace_switch_mm(engine, to); > + ret = to->ppgtt->switch_mm(to->ppgtt, req); > + /* The hardware context switch is emitted, but we haven't > + * actually changed the state - so it's probably safe to bail > + * here. Still, let the user know something dangerous has > + * happened. > + */ > + if (ret) > + return ret; > + > + to->ppgtt->pd_dirty_rings &= ~(1 << RCS); > + } > + if (hw_flags & MI_FORCE_RESTORE) > + to->ppgtt->pd_dirty_rings &= ~(1 << RCS); > + > + for (i = 0; i < MAX_L3_SLICES; i++) { > + if (!(to->remap_slice & (1<<i))) > + continue; > + > + ret = i915_gem_l3_remap(req, i); > + if (ret) > + return ret; > + > + to->remap_slice &= ~(1<<i); > + } > + > + if (!to->legacy_hw_ctx.initialized) { > if (engine->init_context) { > ret = engine->init_context(req); > if (ret) > - DRM_ERROR("ring init context: %d\n", ret); > + return ret; > } > + to->legacy_hw_ctx.initialized = true; > } > > return 0; > > unpin_out: > - if (engine->id == RCS) > - i915_gem_object_ggtt_unpin(to->legacy_hw_ctx.rcs_state); > + i915_gem_object_ggtt_unpin(to->legacy_hw_ctx.rcs_state); > return ret; > } > > @@ -853,7 +832,31 @@ int i915_switch_context(struct drm_i915_gem_request *req) > return 0; > } > > - return do_switch(req); > + if (engine->id != RCS) { > + struct intel_context *from = engine->last_context; > + struct intel_context *to = req->ctx; > + > + if (to->ppgtt && > + (from != to || > + intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) { > + int ret; > + > + trace_switch_mm(engine, to); > + ret = to->ppgtt->switch_mm(to->ppgtt, req); > + if (ret) > + return ret; > + > + to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); > + } > + > + if (from) > + i915_gem_context_unreference(from); > + i915_gem_context_reference(to); > + engine->last_context = to; > + return 0; > + } > + > + return do_rcs_switch(req); > } > > static bool contexts_enabled(struct drm_device *dev) > -- > 2.8.0.rc3 > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx