hw_flags) > return ret; > } > > -static inline bool skip_rcs_switch(struct intel_engine_cs *engine, > - struct intel_context *from, > +static inline bool skip_rcs_switch(struct intel_context *from, > struct intel_context *to) > { > if (to->remap_slice) > return false; > > - if (to->ppgtt && from == to && > - !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) > - return true; > + if (!to->legacy_hw_ctx.initialized) > + return false; > > - return false; > + if (to->ppgtt && to->ppgtt->pd_dirty_rings & (1 << RCS)) > + return false; > + > + return to == from; > } > > static bool > @@ -643,18 +644,12 @@ needs_pd_load_pre(struct intel_engine_cs *engine, struct intel_context *to) > } > > static bool > -needs_pd_load_post(struct intel_engine_cs *engine, struct intel_context *to, > - u32 hw_flags) > +needs_pd_load_post(struct intel_context *to, u32 hw_flags) > { > - struct drm_i915_private *dev_priv = engine->dev->dev_private; > - > if (!to->ppgtt) > return false; > > - if (!IS_GEN8(engine->dev)) > - return false; > - > - if (engine != &dev_priv->engine[RCS]) > + if (!IS_GEN8(to->i915)) > return false; > > if (hw_flags & MI_RESTORE_INHIBIT) Above hunk might be better in the previous patch, but meh. > @@ -667,12 +662,11 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) > { > struct intel_context *to = req->ctx; > struct intel_engine_cs *engine = req->engine; > - struct intel_context *from = engine->last_context; > - u32 hw_flags = 0; > - bool uninitialized = false; > + struct intel_context *from; > + u32 hw_flags; > int ret, i; > > - if (skip_rcs_switch(engine, from, to)) > + if (skip_rcs_switch(engine->last_context, to)) > return 0; > > /* Trying to pin first makes error handling easier. */ > @@ -686,9 +680,23 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) > * Pin can switch back to the default context if we end up calling into > * evict_everything - as a last ditch gtt defrag effort that also > * switches to the default context. Hence we need to reload from here. > + * > + * XXX: Doing so is painfully broken! Why the XXX addition here? I thought the point of this patch is to fix this ... > */ > from = engine->last_context; > > + /* > + * Clear this page out of any CPU caches for coherent swap-in/out. Note > + * that thanks to write = false in this call and us not setting any gpu > + * write domains when putting a context object onto the active list > + * (when switching away from it), this won't block. > + * > + * XXX: We need a real interface to do this instead of trickery. > + */ > + ret = i915_gem_object_set_to_gtt_domain(to->legacy_hw_ctx.rcs_state, false); > + if (ret) > + goto unpin_out; > + > if (needs_pd_load_pre(engine, to)) { > /* Older GENs and non render rings still want the load first, > * "PP_DCLV followed by PP_DIR_BASE register through Load > @@ -700,70 +708,36 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) > goto unpin_out; > > /* Doing a PD load always reloads the page dirs */ > - to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); > + to->ppgtt->pd_dirty_rings &= ~(1 << RCS); Superflous change for the imo worse. > } > > - /* > - * Clear this page out of any CPU caches for coherent swap-in/out. Note > - * that thanks to write = false in this call and us not setting any gpu > - * write domains when putting a context object onto the active list > - * (when switching away from it), this won't block. > - * > - * XXX: We need a real interface to do this instead of trickery. > - */ > - ret = i915_gem_object_set_to_gtt_domain(to->legacy_hw_ctx.rcs_state, false); > - if (ret) > - goto unpin_out; > - > - if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) { > - hw_flags |= MI_RESTORE_INHIBIT; > + if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) > /* NB: If we inhibit the restore, the context is not allowed to > * die because future work may end up depending on valid address > * space. This means we must enforce that a page table load > * occur when this occurs. */ > - } else if (to->ppgtt && > - (intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) { > - hw_flags |= MI_FORCE_RESTORE; > - to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); > - } > + hw_flags = MI_RESTORE_INHIBIT; > + else if (to->ppgtt && to->ppgtt->pd_dirty_rings & (1 << RCS)) > + hw_flags = MI_FORCE_RESTORE; > + else > + hw_flags = 0; > > /* We should never emit switch_mm more than once */ > WARN_ON(needs_pd_load_pre(engine, to) && > - needs_pd_load_post(engine, to, hw_flags)); > - > - ret = mi_set_context(req, hw_flags); > - if (ret) > - goto unpin_out; > + needs_pd_load_post(to, hw_flags)); > > - /* GEN8 does *not* require an explicit reload if the PDPs have been > - * setup, and we do not wish to move them. > - */ > - if (needs_pd_load_post(engine, to, hw_flags)) { > - trace_switch_mm(engine, to); > - ret = to->ppgtt->switch_mm(to->ppgtt, req); > - /* The hardware context switch is emitted, but we haven't > - * actually changed the state - so it's probably safe to bail > - * here. Still, let the user know something dangerous has > - * happened. > - */ > + if (to != from || (hw_flags & MI_FORCE_RESTORE)) { > + ret = mi_set_context(req, hw_flags); > if (ret) { > - DRM_ERROR("Failed to change address space on context switch\n"); > + /* Force the reload of this and the previous mm */ > + if (needs_pd_load_pre(engine, to)) > + to->ppgtt->pd_dirty_rings |= 1 << RCS; > + if (from && needs_pd_load_pre(engine, from)) > + from->ppgtt->pd_dirty_rings |= 1 << RCS; > goto unpin_out; > } > } > > - for (i = 0; i < MAX_L3_SLICES; i++) { > - if (!(to->remap_slice & (1<<i))) > - continue; > - > - ret = i915_gem_l3_remap(req, i); > - /* If it failed, try again next round */ > - if (ret) > - DRM_DEBUG_DRIVER("L3 remapping failed\n"); > - else > - to->remap_slice &= ~(1<<i); > - } > - > /* The backing object for the context is done after switching to the > * *next* context. Therefore we cannot retire the previous context until > * the next context has already started running. In fact, the below code > @@ -786,19 +760,46 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) > i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state); > i915_gem_context_unreference(from); > } > - > - uninitialized = !to->legacy_hw_ctx.initialized; > - to->legacy_hw_ctx.initialized = true; > - > i915_gem_context_reference(to); > engine->last_context = to; > > - if (uninitialized) { > + /* GEN8 does *not* require an explicit reload if the PDPs have been > + * setup, and we do not wish to move them. > + */ > + if (needs_pd_load_post(to, hw_flags)) { > + trace_switch_mm(engine, to); > + ret = to->ppgtt->switch_mm(to->ppgtt, req); > + /* The hardware context switch is emitted, but we haven't > + * actually changed the state - so it's probably safe to bail > + * here. Still, let the user know something dangerous has > + * happened. > + */ > + if (ret) > + return ret; > + > + to->ppgtt->pd_dirty_rings &= ~(1 << RCS); > + } > + if (hw_flags & MI_FORCE_RESTORE) > + to->ppgtt->pd_dirty_rings &= ~(1 << RCS); > + > + for (i = 0; i < MAX_L3_SLICES; i++) { > + if (!(to->remap_slice & (1<<i))) > + continue; > + > + ret = i915_gem_l3_remap(req, i); > + if (ret) > + return ret; > + > + to->remap_slice &= ~(1<<i); > + } > + > + if (!to->legacy_hw_ctx.initialized) { > if (engine->init_context) { > ret = engine->init_context(req); > if (ret) > - DRM_ERROR("ring init context: %d\n", ret); > + return ret; > } > + to->legacy_hw_ctx.initialized = true; Otherwise Reviewed-by: Daniel Vetter <daniel.vetter@xxxxxxxx> > } > > return 0; > -- > 2.8.0.rc3 > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx