On Tue, 2013-01-08 at 10:53 +0000, Chris Wilson wrote: > Certain workarounds and workloads require objects at specific or at > least known offsets. Privileged users could pin an object into the GTT, > but that has obvious limitations for the general case. Instead, the user > can construct a batch assuming a particular layout for an object and > request that the kernel try its utmost to provide the object at that > location. This has the advantage that not only can it fail, but also > such allocations are transitory - although contention should be rare and > the object persist at the same location between batches. The benefit for > userspace is that it can then avoid all relocations referencing this > object as it resides at a known space - this becomes even more useful > with per-process GTT spaces where there will be virtually no contention > between applications. > > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_drv.h | 8 ++ > drivers/gpu/drm/i915/i915_gem.c | 10 +- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 139 +++++++++++++++++++++++++++- > include/uapi/drm/i915_drm.h | 3 +- > 4 files changed, 151 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 97e2049..7da4953 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1429,6 +1429,14 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, > size_t size); > void i915_gem_free_object(struct drm_gem_object *obj); > > +uint32_t i915_gem_get_gtt_alignment(struct drm_device *dev, > + uint32_t size, int tiling_mode); > +uint32_t i915_gem_get_gtt_size(struct drm_device *dev, > + uint32_t size, int tiling_mode); > +bool i915_gem_valid_gtt_space(struct drm_device *dev, > + struct drm_mm_node *gtt_space, > + unsigned long cache_level); > + > int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, > uint32_t alignment, > bool map_and_fenceable, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index b7661e1..f1a23bb 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1435,7 +1435,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) > obj->fault_mappable = false; > } > > -static uint32_t > +uint32_t > i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) > { > uint32_t gtt_size; > @@ -1463,7 +1463,7 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) > * Return the required GTT alignment for an object, taking into account > * potential fence register mapping. > */ > -static uint32_t > +uint32_t > i915_gem_get_gtt_alignment(struct drm_device *dev, > uint32_t size, > int tiling_mode) > @@ -2833,9 +2833,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) > return 0; > } > > -static bool i915_gem_valid_gtt_space(struct drm_device *dev, > - struct drm_mm_node *gtt_space, > - unsigned long cache_level) > +bool i915_gem_valid_gtt_space(struct drm_device *dev, > + struct drm_mm_node *gtt_space, > + unsigned long cache_level) > { > struct drm_mm_node *other; > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index f6bd92c..bb8b0d6 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -403,6 +403,126 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, > return ret; > } > > +static struct drm_mm_node * > +get_pinned_block(struct drm_i915_gem_object *obj, u32 size) > +{ > + struct drm_device *dev = obj->base.dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct drm_mm_node *gtt; > + > + gtt = drm_mm_create_block(&dev_priv->mm.gtt_space, > + obj->exec_entry->offset, > + size, > + false); > + if (gtt == NULL) > + return NULL; > + > + if (!i915_gem_valid_gtt_space(dev, gtt, obj->cache_level)) { > + drm_mm_put_block(gtt); > + return NULL; > + } > + > + gtt->color = obj->cache_level; > + return gtt; > +} > + > +static int > +__i915_gem_evict_range(struct drm_device *dev, u32 start, u32 end, u32 color) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct drm_i915_gem_object *obj, *next; > + > + list_for_each_entry_safe(obj, next, &dev_priv->mm.bound_list, gtt_list) { > + u32 node_start = obj->gtt_space->start; > + u32 node_end = obj->gtt_space->start + obj->gtt_space->size; > + > + if (!HAS_LLC(dev)) { > + if (node_end <= start && obj->tiling_mode != color) > + node_end += 4096; > + if (node_start >= end && obj->tiling_mode != color) > + node_start -= 4096; > + } > + > + if (node_end > start && node_start < end) { > + int ret = i915_gem_object_unbind(obj); > + if (ret) > + return ret; > + } > + } > + > + return 0; > +} > + > +static int > +i915_gem_execbuffer_pinned_object(struct drm_i915_gem_object *obj) > +{ > + struct drm_device *dev = obj->base.dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; > + struct drm_mm_node *gtt; > + bool fenceable; > + u32 size; > + int ret; > + > + if (entry->alignment && entry->offset & (entry->alignment - 1)) > + return -EINVAL; > + > + if (obj->gtt_offset == entry->offset) > + return 0; > + > + if (entry->offset & (i915_gem_get_gtt_alignment(dev, obj->base.size, obj->tiling_mode) - 1)) { > + fenceable = false; > + if (entry->offset & (i915_gem_get_unfenced_gtt_alignment(dev, obj->base.size, obj->tiling_mode) - 1)) > + return -EINVAL; > + } > + > + i915_gem_object_pin_pages(obj); > + > + ret = i915_gem_object_unbind(obj); > + if (ret) > + goto unpin_pages; > + > + size = i915_gem_get_gtt_size(dev, obj->base.size, obj->tiling_mode); > + gtt = get_pinned_block(obj, size); > + if (gtt == NULL) { > + ret = __i915_gem_evict_range(dev, > + entry->offset, > + entry->offset + size, > + obj->tiling_mode); A typo as discussed on IRC, tiling_mode should be cache_level. The same goes for __i915_gem_evict_range(). Otherwise on the series: Reviewed-by: Imre Deak <imre.deak at intel.com> --Imre