[PATCH 11/11] drm/i915: Allow userspace to request an object at a specific offset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2013-01-08 at 10:53 +0000, Chris Wilson wrote:
> Certain workarounds and workloads require objects at specific or at
> least known offsets. Privileged users could pin an object into the GTT,
> but that has obvious limitations for the general case. Instead, the user
> can construct a batch assuming a particular layout for an object and
> request that the kernel try its utmost to provide the object at that
> location. This has the advantage that not only can it fail, but also
> such allocations are transitory - although contention should be rare and
> the object persist at the same location between batches. The benefit for
> userspace is that it can then avoid all relocations referencing this
> object as it resides at a known space - this becomes even more useful
> with per-process GTT spaces where there will be virtually no contention
> between applications.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |    8 ++
>  drivers/gpu/drm/i915/i915_gem.c            |   10 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  139 +++++++++++++++++++++++++++-
>  include/uapi/drm/i915_drm.h                |    3 +-
>  4 files changed, 151 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 97e2049..7da4953 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1429,6 +1429,14 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size);
>  void i915_gem_free_object(struct drm_gem_object *obj);
>  
> +uint32_t i915_gem_get_gtt_alignment(struct drm_device *dev,
> +				    uint32_t size, int tiling_mode);
> +uint32_t i915_gem_get_gtt_size(struct drm_device *dev,
> +			       uint32_t size, int tiling_mode);
> +bool i915_gem_valid_gtt_space(struct drm_device *dev,
> +			      struct drm_mm_node *gtt_space,
> +			      unsigned long cache_level);
> +
>  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  				     uint32_t alignment,
>  				     bool map_and_fenceable,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b7661e1..f1a23bb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1435,7 +1435,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
>  	obj->fault_mappable = false;
>  }
>  
> -static uint32_t
> +uint32_t
>  i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
>  {
>  	uint32_t gtt_size;
> @@ -1463,7 +1463,7 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
>   * Return the required GTT alignment for an object, taking into account
>   * potential fence register mapping.
>   */
> -static uint32_t
> +uint32_t
>  i915_gem_get_gtt_alignment(struct drm_device *dev,
>  			   uint32_t size,
>  			   int tiling_mode)
> @@ -2833,9 +2833,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
>  	return 0;
>  }
>  
> -static bool i915_gem_valid_gtt_space(struct drm_device *dev,
> -				     struct drm_mm_node *gtt_space,
> -				     unsigned long cache_level)
> +bool i915_gem_valid_gtt_space(struct drm_device *dev,
> +			      struct drm_mm_node *gtt_space,
> +			      unsigned long cache_level)
>  {
>  	struct drm_mm_node *other;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index f6bd92c..bb8b0d6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -403,6 +403,126 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
>  	return ret;
>  }
>  
> +static struct drm_mm_node *
> +get_pinned_block(struct drm_i915_gem_object *obj, u32 size)
> +{
> +	struct drm_device *dev = obj->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_mm_node *gtt;
> +
> +	gtt = drm_mm_create_block(&dev_priv->mm.gtt_space,
> +				  obj->exec_entry->offset,
> +				  size,
> +				  false);
> +	if (gtt == NULL)
> +		return NULL;
> +
> +	if (!i915_gem_valid_gtt_space(dev, gtt, obj->cache_level)) {
> +		drm_mm_put_block(gtt);
> +		return NULL;
> +	}
> +
> +	gtt->color = obj->cache_level;
> +	return gtt;
> +}
> +
> +static int
> +__i915_gem_evict_range(struct drm_device *dev, u32 start, u32 end, u32 color)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_object *obj, *next;
> +
> +	list_for_each_entry_safe(obj, next, &dev_priv->mm.bound_list, gtt_list) {
> +		u32 node_start = obj->gtt_space->start;
> +		u32 node_end = obj->gtt_space->start + obj->gtt_space->size;
> +
> +		if (!HAS_LLC(dev)) {
> +			if (node_end <= start && obj->tiling_mode != color)
> +				node_end += 4096;
> +			if (node_start >= end && obj->tiling_mode != color)
> +				node_start -= 4096;
> +		}
> +
> +		if (node_end > start && node_start < end) {
> +			int ret = i915_gem_object_unbind(obj);
> +			if (ret)
> +				return ret;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +i915_gem_execbuffer_pinned_object(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_device *dev = obj->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
> +	struct drm_mm_node *gtt;
> +	bool fenceable;
> +	u32 size;
> +	int ret;
> +
> +	if (entry->alignment && entry->offset & (entry->alignment - 1))
> +		return -EINVAL;
> +
> +	if (obj->gtt_offset == entry->offset)
> +		return 0;
> +
> +	if (entry->offset & (i915_gem_get_gtt_alignment(dev, obj->base.size, obj->tiling_mode) - 1)) {
> +		fenceable = false;
> +		if (entry->offset & (i915_gem_get_unfenced_gtt_alignment(dev, obj->base.size, obj->tiling_mode) - 1))
> +			return -EINVAL;
> +	}
> +
> +	i915_gem_object_pin_pages(obj);
> +
> +	ret = i915_gem_object_unbind(obj);
> +	if (ret)
> +		goto unpin_pages;
> +
> +	size = i915_gem_get_gtt_size(dev, obj->base.size, obj->tiling_mode);
> +	gtt = get_pinned_block(obj, size);
> +	if (gtt == NULL) {
> +		ret = __i915_gem_evict_range(dev,
> +					     entry->offset,
> +					     entry->offset + size,
> +					     obj->tiling_mode);

A typo as discussed on IRC, tiling_mode should be cache_level. The same
goes for __i915_gem_evict_range(). Otherwise on the series:

Reviewed-by: Imre Deak <imre.deak at intel.com>

--Imre




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux