On Thu, Jun 27, 2013 at 04:31:02PM -0700, Ben Widawsky wrote: > This requires doing an actual switch of the page tables during the > context switch/execbuf. > > Along the way, cut away as much "aliasing" ppgtt as possible > > Signed-off-by: Ben Widawsky <ben at bwidawsk.net> > --- > drivers/gpu/drm/i915/i915_gem.c | 22 +++++++++++++--------- > drivers/gpu/drm/i915/i915_gem_context.c | 29 +++++++++++++++++------------ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 27 ++++++++++++++++++++------- > 3 files changed, 50 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index af0150e..f05d585 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2170,7 +2170,10 @@ request_to_vm(struct drm_i915_gem_request *request) > struct drm_i915_private *dev_priv = request->ring->dev->dev_private; > struct i915_address_space *vm; > > - vm = &dev_priv->gtt.base; > + if (request->ctx) > + vm = &request->ctx->ppgtt.base; > + else > + vm = &dev_priv->gtt.base; > > return vm; > } > @@ -2676,10 +2679,10 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj, > > if (obj->has_global_gtt_mapping && is_i915_ggtt(vm)) > i915_gem_gtt_unbind_object(obj); > - if (obj->has_aliasing_ppgtt_mapping) { > - i915_ppgtt_unbind_object(dev_priv->gtt.aliasing_ppgtt, obj); > - obj->has_aliasing_ppgtt_mapping = 0; > - } > + > + vm->clear_range(vm, i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT, > + obj->base.size >> PAGE_SHIFT); > + > i915_gem_gtt_finish_object(obj); > i915_gem_object_unpin_pages(obj); > > @@ -3444,11 +3447,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > return ret; > } > > - if (obj->has_global_gtt_mapping) > + if (!is_i915_ggtt(vm) && obj->has_global_gtt_mapping) > i915_gem_gtt_bind_object(obj, cache_level); > - if (obj->has_aliasing_ppgtt_mapping) > - i915_ppgtt_bind_object(dev_priv->gtt.aliasing_ppgtt, > - obj, cache_level); > + > + vm->insert_entries(vm, obj->pages, > + i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT, > + cache_level); > > i915_gem_obj_set_color(obj, vm, cache_level); > } > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 37ebfa2..cea036e 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -526,10 +526,14 @@ static int do_switch(struct intel_ring_buffer *ring, > if (from == to && from->last_ring == ring) > return 0; > > + ret = to->ppgtt.switch_mm(&to->ppgtt, ring); > + if (ret) > + return ret; > + > if (ring != &dev_priv->ring[RCS] && from) { > ret = i915_add_request(ring, NULL); > if (ret) > - return ret; > + goto err_out; > i915_gem_context_unreference(from); > } > > @@ -538,7 +542,7 @@ static int do_switch(struct intel_ring_buffer *ring, > > ret = i915_gem_ggtt_pin(to->obj, CONTEXT_ALIGN, false, false); > if (ret) > - return ret; > + goto err_out; > > /* Clear this page out of any CPU caches for coherent swap-in/out. Note > * that thanks to write = false in this call and us not setting any gpu > @@ -546,10 +550,8 @@ static int do_switch(struct intel_ring_buffer *ring, > * (when switching away from it), this won't block. > * XXX: We need a real interface to do this instead of trickery. */ > ret = i915_gem_object_set_to_gtt_domain(to->obj, false); > - if (ret) { > - i915_gem_object_unpin(to->obj); > - return ret; > - } > + if (ret) > + goto unpin_out; > > if (!to->obj->has_global_gtt_mapping) > i915_gem_gtt_bind_object(to->obj, to->obj->cache_level); > @@ -560,10 +562,8 @@ static int do_switch(struct intel_ring_buffer *ring, > hw_flags |= MI_FORCE_RESTORE; > > ret = mi_set_context(ring, to, hw_flags); > - if (ret) { > - i915_gem_object_unpin(to->obj); > - return ret; > - } > + if (ret) > + goto unpin_out; > > /* The backing object for the context is done after switching to the > * *next* context. Therefore we cannot retire the previous context until > @@ -593,7 +593,7 @@ static int do_switch(struct intel_ring_buffer *ring, > * scream. > */ > WARN_ON(mi_set_context(ring, from, MI_RESTORE_INHIBIT)); > - return ret; > + goto err_out; > } > > i915_gem_object_unpin(from->obj); > @@ -605,8 +605,13 @@ done: > ring->last_context = to; > to->is_initialized = true; > to->last_ring = ring; > - > return 0; > + > +unpin_out: > + i915_gem_object_unpin(to->obj); > +err_out: > + WARN_ON(from->ppgtt.switch_mm(&from->ppgtt, ring)); > + return ret; > } > > /** > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index aeec8c0..0f6bf3c 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -437,11 +437,21 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, > } > > /* Ensure ppgtt mapping exists if needed */ > - if (dev_priv->gtt.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { > - i915_ppgtt_bind_object(dev_priv->gtt.aliasing_ppgtt, > - obj, obj->cache_level); > - > + if (is_i915_ggtt(vm) && > + dev_priv->gtt.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { > + /* FIXME: remove this later */ > + struct i915_address_space *appgtt = > + &dev_priv->gtt.aliasing_ppgtt->base; > + unsigned long obj_offset = i915_gem_obj_offset(obj, appgtt); > + > + appgtt->insert_entries(appgtt, obj->pages, > + obj_offset >> PAGE_SHIFT, > + obj->cache_level); > I meant to remove this, but I missed it. In theory I don't ever want to insert Aliasing PPGTT PTEs. Will remove it locally and test it now. > > obj->has_aliasing_ppgtt_mapping = 1; > + } else { > + vm->insert_entries(vm, obj->pages, > + i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT, > + obj->cache_level); > } > > if (entry->offset != i915_gem_obj_offset(obj, vm)) { > @@ -864,7 +874,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > struct i915_hw_context *ctx; > struct i915_address_space *vm; > u32 ctx_id = i915_execbuffer2_get_context_id(*args); > - u32 exec_start, exec_len; > + u32 exec_start = args->batch_start_offset, exec_len; > u32 mask, flags; > int ret, mode, i; > bool need_relocs; > @@ -1085,8 +1095,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > goto err; > } > > - exec_start = i915_gem_obj_offset(batch_obj, vm) + > - args->batch_start_offset; > + if (batch_obj->has_global_gtt_mapping) > + exec_start += i915_gem_ggtt_offset(batch_obj); > + else > + exec_start += i915_gem_obj_offset(batch_obj, vm); > + > exec_len = args->batch_len; > if (cliprects) { > for (i = 0; i < args->num_cliprects; i++) { > -- > 1.8.3.1 > -- Ben Widawsky, Intel Open Source Technology Center