On Sun, Aug 10, 2014 at 08:06:57AM +0100, Chris Wilson wrote: > Move the decision on whether we need to have a mappable object during > execbuffer to the fore and then reuse that decision by propagating the > flag through to reservation. As a corollary, before doing the actual > relocation through the GTT, we can make sure that we do have a GTT > mapping through which to operate. > > v2: Revamp and resend to ease future patches. > v3: Refresh patch rationale > > References: https://bugs.freedesktop.org/show_bug.cgi?id=81094 > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Ben Widawsky <benjamin.widawsky@xxxxxxxxx> > Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> Ok, the secure batch hunk in here looks rather unrelated and imo also a bit incomplete. I've dropped it. And I've added a bit of text to the commit message to explain why this patch touches map_and_fenceable logic. -Daniel > --- > drivers/gpu/drm/i915/i915_gem.c | 8 +-- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 80 ++++++++++++++---------------- > 2 files changed, 42 insertions(+), 46 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 99250d27668d..6366230c4e32 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2930,9 +2930,8 @@ int i915_vma_unbind(struct i915_vma *vma) > vma->unbind_vma(vma); > > list_del_init(&vma->mm_list); > - /* Avoid an unnecessary call to unbind on rebind. */ > if (i915_is_ggtt(vma->vm)) > - obj->map_and_fenceable = true; > + obj->map_and_fenceable = false; > > drm_mm_remove_node(&vma->node); > i915_gem_vma_destroy(vma); > @@ -3284,6 +3283,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) > return 0; > } > } else if (enable) { > + if (WARN_ON(!obj->map_and_fenceable)) > + return -EINVAL; > + > reg = i915_find_fence_reg(dev); > if (IS_ERR(reg)) > return PTR_ERR(reg); > @@ -4333,8 +4335,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, > > obj->fence_reg = I915_FENCE_REG_NONE; > obj->madv = I915_MADV_WILLNEED; > - /* Avoid an unnecessary call to unbind on the first bind. */ > - obj->map_and_fenceable = true; > > i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); > } > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 60998fc4e5b2..8b734d5d16b4 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -35,6 +35,7 @@ > > #define __EXEC_OBJECT_HAS_PIN (1<<31) > #define __EXEC_OBJECT_HAS_FENCE (1<<30) > +#define __EXEC_OBJECT_NEEDS_MAP (1<<29) > #define __EXEC_OBJECT_NEEDS_BIAS (1<<28) > > #define BATCH_OFFSET_BIAS (256*1024) > @@ -535,14 +536,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb) > } > > static int > -need_reloc_mappable(struct i915_vma *vma) > -{ > - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; > - return entry->relocation_count && !use_cpu_reloc(vma->obj) && > - i915_is_ggtt(vma->vm); > -} > - > -static int > i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > struct intel_engine_cs *ring, > bool *need_reloc) > @@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > struct drm_i915_gem_object *obj = vma->obj; > struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; > bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; > - bool need_fence; > uint64_t flags; > int ret; > > flags = 0; > - > - need_fence = > - has_fenced_gpu_access && > - entry->flags & EXEC_OBJECT_NEEDS_FENCE && > - obj->tiling_mode != I915_TILING_NONE; > - if (need_fence || need_reloc_mappable(vma)) > + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) > flags |= PIN_MAPPABLE; > - > if (entry->flags & EXEC_OBJECT_NEEDS_GTT) > flags |= PIN_GLOBAL; > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) > @@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > } > > static bool > -eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access) > +need_reloc_mappable(struct i915_vma *vma) > { > struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; > - struct drm_i915_gem_object *obj = vma->obj; > - bool need_fence, need_mappable; > > - need_fence = > - has_fenced_gpu_access && > - entry->flags & EXEC_OBJECT_NEEDS_FENCE && > - obj->tiling_mode != I915_TILING_NONE; > - need_mappable = need_fence || need_reloc_mappable(vma); > + if (entry->relocation_count == 0) > + return false; > > - WARN_ON((need_mappable || need_fence) && > + if (!i915_is_ggtt(vma->vm)) > + return false; > + > + /* See also use_cpu_reloc() */ > + if (HAS_LLC(vma->obj->base.dev)) > + return false; > + > + if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) > + return false; > + > + return true; > +} > + > +static bool > +eb_vma_misplaced(struct i915_vma *vma) > +{ > + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; > + struct drm_i915_gem_object *obj = vma->obj; > + > + WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && > !i915_is_ggtt(vma->vm)); > > if (entry->alignment && > vma->node.start & (entry->alignment - 1)) > return true; > > - if (need_mappable && !obj->map_and_fenceable) > + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) > return true; > > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && > @@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > obj->tiling_mode != I915_TILING_NONE; > need_mappable = need_fence || need_reloc_mappable(vma); > > - if (need_mappable) > + if (need_mappable) { > + entry->flags |= __EXEC_OBJECT_NEEDS_MAP; > list_move(&vma->exec_list, &ordered_vmas); > - else > + } else > list_move_tail(&vma->exec_list, &ordered_vmas); > > obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; > @@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > if (!drm_mm_node_allocated(&vma->node)) > continue; > > - if (eb_vma_misplaced(vma, has_fenced_gpu_access)) > + if (eb_vma_misplaced(vma)) > ret = i915_vma_unbind(vma); > else > ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); > @@ -1386,25 +1387,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure > * batch" bit. Hence we need to pin secure batches into the global gtt. > * hsw should have this fixed, but bdw mucks it up again. */ > - if (flags & I915_DISPATCH_SECURE && > - !batch_obj->has_global_gtt_mapping) { > - /* When we have multiple VMs, we'll need to make sure that we > - * allocate space first */ > - struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj); > - BUG_ON(!vma); > - vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND); > - } > + if (flags & I915_DISPATCH_SECURE) { > + ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); > + if (ret) > + goto err; > > - if (flags & I915_DISPATCH_SECURE) > exec_start += i915_gem_obj_ggtt_offset(batch_obj); > - else > + } else > exec_start += i915_gem_obj_offset(batch_obj, vm); > > ret = legacy_ringbuffer_submission(dev, file, ring, ctx, > - args, &eb->vmas, batch_obj, exec_start, flags); > - if (ret) > - goto err; > + args, &eb->vmas, batch_obj, exec_start, flags); > > + if (flags & I915_DISPATCH_SECURE) > + i915_gem_object_ggtt_unpin(batch_obj); > err: > /* the request owns the ref now */ > i915_gem_context_unreference(ctx); > -- > 2.1.0.rc1 > -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx