[PATCH] drm/i915: Introduce a new create ioctl for user specified placement

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jul 05, 2013 at 02:42:04PM +0100, Chris Wilson wrote:
> Despite being a unified memory architecture (UMA) some bits of memory
> are more equal than others. In particular we have the thorny issue of
> stolen memory, memory stolen from the system by the BIOS and reserved
> for igfx use. Stolen memory is required for some functions of the GPU
> and display engine, but in general it goes wasted. Whilst we cannot
> return it back to the system, we need to find some other method for
> utilising it. As we do not support direct access to the physical address
> in the stolen region, it behaves like a different class of memory,
> closer in kin to local GPU memory. This strongly suggests that we need a
> placement model like TTM if we are to fully utilize these discrete
> chunks of differing memory.
> 
> This new create ioctl therefore exists to allow the user to create these
> second class buffer objects from stolen memory. At the moment direct
> access by the CPU through mmaps and pread/pwrite are verboten on the
> objects, and so the user must be aware of the limitations of the objects
> created. Yet, those limitations rarely reduce the desired functionality
> in many use cases and so the user should be able to easily fill the
> stolen memory and so help to reduce overall memory pressure.
> 
> The most obvious use case for stolen memory is for the creation of objects
> for the display engine which already have very similar restrictions on
> access. However, we want a reasonably general ioctl in order to cater
> for diverse scenarios beyond the author's imagination.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

Looks like a sane idea. But I think we should call the new placement flags
placaments, not domains, since imo they are a rather different concept
than what we currently call domains.

I also wonder whether we should have some for of initial domain (for
people who want to do the clflushing up-front, or in case we ever get a
clflushed page-cache), but the spare bits in flags should be good enough.
Or maybe leave the domain ioctl struct member and move the placement
defines to the high bits.

And I want i-g-t tests, both basic functionality tests and checking
whether all the verboten stuff is indeed forbidden.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_dma.c        |   5 +-
>  drivers/gpu/drm/i915/i915_drv.h        |  15 +++--
>  drivers/gpu/drm/i915/i915_gem.c        |  69 +++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_gem_tiling.c | 107 ++++++++++++++++++---------------
>  include/uapi/drm/i915_drm.h            |  55 +++++++++++++++++
>  5 files changed, 194 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 0e22142..0c7299d 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1876,8 +1876,8 @@ struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED),
> -	DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED),
> -	DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling_ioctl, DRM_UNLOCKED),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED),
> @@ -1889,6 +1889,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_AUTH|DRM_UNLOCKED),
>  };
>  
>  int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index dc7475d..cb54a66 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1646,6 +1646,8 @@ int i915_gem_init_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv);
>  int i915_gem_create_ioctl(struct drm_device *dev, void *data,
>  			  struct drm_file *file_priv);
> +int i915_gem_create2_ioctl(struct drm_device *dev, void *data,
> +			   struct drm_file *file_priv);
>  int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
>  			 struct drm_file *file_priv);
>  int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> @@ -1680,10 +1682,10 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
>  			   struct drm_file *file_priv);
>  int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
>  			   struct drm_file *file_priv);
> -int i915_gem_set_tiling(struct drm_device *dev, void *data,
> -			struct drm_file *file_priv);
> -int i915_gem_get_tiling(struct drm_device *dev, void *data,
> -			struct drm_file *file_priv);
> +int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
> +			      struct drm_file *file_priv);
> +int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
> +			      struct drm_file *file_priv);
>  int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  				struct drm_file *file_priv);
>  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
> @@ -1797,6 +1799,8 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
>  int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
>  					    uint32_t read_domains,
>  					    uint32_t write_domain);
> +int __must_check i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
> +					    int tiling_mode, int pitch);
>  int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
>  int __must_check i915_gem_init(struct drm_device *dev);
>  int __must_check i915_gem_init_hw(struct drm_device *dev);
> @@ -1832,6 +1836,9 @@ void i915_gem_detach_phys_object(struct drm_device *dev,
>  void i915_gem_free_all_phys_object(struct drm_device *dev);
>  void i915_gem_release(struct drm_device *dev, struct drm_file *file);
>  
> +bool
> +i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode);
> +
>  uint32_t
>  i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode);
>  uint32_t
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 3ea54c8..ac24bfb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -242,8 +242,7 @@ i915_gem_dumb_create(struct drm_file *file,
>  	/* have to work out size/pitch and return them */
>  	args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
>  	args->size = args->pitch * args->height;
> -	return i915_gem_create(file, dev,
> -			       args->size, &args->handle);
> +	return i915_gem_create(file, dev, args->size, &args->handle);
>  }
>  
>  int i915_gem_dumb_destroy(struct drm_file *file,
> @@ -261,9 +260,71 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
>  		      struct drm_file *file)
>  {
>  	struct drm_i915_gem_create *args = data;
> +	return i915_gem_create(file, dev, args->size, &args->handle);
> +}
> +
> +int
> +i915_gem_create2_ioctl(struct drm_device *dev, void *data,
> +		       struct drm_file *file)
> +{
> +	struct drm_i915_gem_create2 *args = data;
> +	struct drm_i915_gem_object *obj;
> +	unsigned cache_level;
> +	int ret;
> +
> +	if (args->flags & ~(I915_CREATE_FLAG_INEXACT_DOMAIN))
> +		return -EINVAL;
> +
> +	if (!i915_tiling_ok(dev,
> +			    args->stride, args->size, args->tiling_mode))
> +		return -EINVAL;
> +
> +	switch (args->caching) {
> +	case I915_CACHING_NONE:
> +		cache_level = I915_CACHE_NONE;
> +		break;
> +	case I915_CACHING_CACHED:
> +		cache_level = I915_CACHE_LLC;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	if (args->size == 0 || args->size & 4095)
> +		return -EINVAL;
> +
> +	obj = NULL;
> +	switch (args->domain) {
> +	case I915_CREATE_DOMAIN_SYSTEM:
> +		obj = i915_gem_alloc_object(dev, args->size);
> +		break;
> +	case I915_CREATE_DOMAIN_STOLEN:
> +		obj = i915_gem_object_create_stolen(dev, args->size);
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +	if (obj == NULL)
> +		return -ENOMEM;
> +
> +	mutex_lock(&dev->struct_mutex);
> +	ret =  i915_gem_object_set_cache_level(obj, cache_level);
> +	if (ret)
> +		goto out;
>  
> -	return i915_gem_create(file, dev,
> -			       args->size, &args->handle);
> +	ret = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
> +	if (ret)
> +		goto out;
> +
> +	ret = drm_gem_handle_create(file, &obj->base, &args->handle);
> +	if (ret)
> +		goto out;
> +
> +	trace_i915_gem_object_create(obj);
> +out:
> +	drm_gem_object_unreference(&obj->base);
> +	mutex_unlock(&dev->struct_mutex);
> +	return ret;
>  }
>  
>  static inline int
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index 92a8d27..cbd89f8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -201,7 +201,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
>  }
>  
>  /* Check pitch constriants for all chips & tiling formats */
> -static bool
> +bool
>  i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
>  {
>  	int tile_width;
> @@ -285,13 +285,67 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
>  	return true;
>  }
>  
> +int
> +i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
> +			   int tiling_mode, int stride)
> +{
> +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +	int ret;
> +
> +	if (tiling_mode == obj->tiling_mode && stride == obj->stride)
> +		return 0;
> +
> +	/* We need to rebind the object if its current allocation
> +	 * no longer meets the alignment restrictions for its new
> +	 * tiling mode. Otherwise we can just leave it alone, but
> +	 * need to ensure that any fence register is updated before
> +	 * the next fenced (either through the GTT or by the BLT unit
> +	 * on older GPUs) access.
> +	 *
> +	 * After updating the tiling parameters, we then flag whether
> +	 * we need to update an associated fence register. Note this
> +	 * has to also include the unfenced register the GPU uses
> +	 * whilst executing a fenced command for an untiled object.
> +	 */
> +
> +	obj->map_and_fenceable =
> +		!i915_gem_obj_ggtt_bound(obj) ||
> +		(i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end &&
> +		 i915_gem_object_fence_ok(obj, tiling_mode));
> +
> +	/* Rebind if we need a change of alignment */
> +	ret = 0;
> +	if (!obj->map_and_fenceable) {
> +		u32 unfenced_alignment =
> +			i915_gem_get_gtt_alignment(dev_priv->dev,
> +						   obj->base.size, tiling_mode,
> +						   false);
> +		if (i915_gem_obj_ggtt_offset(obj) & (unfenced_alignment - 1))
> +			ret = i915_gem_object_unbind(obj);
> +	}
> +
> +	if (ret == 0) {
> +		obj->fence_dirty =
> +			obj->fenced_gpu_access ||
> +			obj->fence_reg != I915_FENCE_REG_NONE;
> +
> +		obj->tiling_mode = tiling_mode;
> +		obj->stride = stride;
> +
> +		/* Force the fence to be reacquired for GTT access */
> +		i915_gem_release_mmap(obj);
> +	}
> +
> +	return ret;
> +}
> +
>  /**
>   * Sets the tiling mode of an object, returning the required swizzling of
>   * bit 6 of addresses in the object.
>   */
>  int
> -i915_gem_set_tiling(struct drm_device *dev, void *data,
> -		   struct drm_file *file)
> +i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
> +			  struct drm_file *file)
>  {
>  	struct drm_i915_gem_set_tiling *args = data;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> @@ -343,48 +397,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
>  	}
>  
>  	mutex_lock(&dev->struct_mutex);
> -	if (args->tiling_mode != obj->tiling_mode ||
> -	    args->stride != obj->stride) {
> -		/* We need to rebind the object if its current allocation
> -		 * no longer meets the alignment restrictions for its new
> -		 * tiling mode. Otherwise we can just leave it alone, but
> -		 * need to ensure that any fence register is updated before
> -		 * the next fenced (either through the GTT or by the BLT unit
> -		 * on older GPUs) access.
> -		 *
> -		 * After updating the tiling parameters, we then flag whether
> -		 * we need to update an associated fence register. Note this
> -		 * has to also include the unfenced register the GPU uses
> -		 * whilst executing a fenced command for an untiled object.
> -		 */
> -
> -		obj->map_and_fenceable =
> -			!i915_gem_obj_ggtt_bound(obj) ||
> -			(i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end &&
> -			 i915_gem_object_fence_ok(obj, args->tiling_mode));
> -
> -		/* Rebind if we need a change of alignment */
> -		if (!obj->map_and_fenceable) {
> -			u32 unfenced_alignment =
> -				i915_gem_get_gtt_alignment(dev, obj->base.size,
> -							    args->tiling_mode,
> -							    false);
> -			if (i915_gem_obj_ggtt_offset(obj) & (unfenced_alignment - 1))
> -				ret = i915_gem_object_unbind(obj);
> -		}
> -
> -		if (ret == 0) {
> -			obj->fence_dirty =
> -				obj->fenced_gpu_access ||
> -				obj->fence_reg != I915_FENCE_REG_NONE;
> -
> -			obj->tiling_mode = args->tiling_mode;
> -			obj->stride = args->stride;
> -
> -			/* Force the fence to be reacquired for GTT access */
> -			i915_gem_release_mmap(obj);
> -		}
> -	}
> +	ret = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
>  	/* we have to maintain this existing ABI... */
>  	args->stride = obj->stride;
>  	args->tiling_mode = obj->tiling_mode;
> @@ -410,8 +423,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
>   * Returns the current tiling mode and required bit 6 swizzling for the object.
>   */
>  int
> -i915_gem_get_tiling(struct drm_device *dev, void *data,
> -		   struct drm_file *file)
> +i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
> +			  struct drm_file *file)
>  {
>  	struct drm_i915_gem_get_tiling *args = data;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 923ed7f..b3b21fb 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_GEM_SET_CACHING	0x2f
>  #define DRM_I915_GEM_GET_CACHING	0x30
>  #define DRM_I915_REG_READ		0x31
> +#define DRM_I915_GEM_CREATE2		0x32
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -228,6 +229,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_ENTERVT	DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT)
>  #define DRM_IOCTL_I915_GEM_LEAVEVT	DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT)
>  #define DRM_IOCTL_I915_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create)
> +#define DRM_IOCTL_I915_GEM_CREATE2	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE2, struct drm_i915_gem_create2)
>  #define DRM_IOCTL_I915_GEM_PREAD	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread)
>  #define DRM_IOCTL_I915_GEM_PWRITE	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite)
>  #define DRM_IOCTL_I915_GEM_MMAP		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap)
> @@ -407,6 +409,59 @@ struct drm_i915_gem_create {
>  	__u32 pad;
>  };
>  
> +struct drm_i915_gem_create2 {
> +	/**
> +	 * Requested size for the object.
> +	 *
> +	 * The (page-aligned) allocated size for the object will be returned.
> +	 */
> +	__u64 size;
> +
> +	/**
> +	 * Requested placement or memory domain
> +	 *
> +	 * You can request that the object be created from special memory
> +	 * rather than regular system pages. Such irregular objects may
> +	 * have certain restrictions (such as CPU access to a stolen
> +	 * object is verboten).
> +	 */
> +	__u32 domain;
> +#define I915_CREATE_DOMAIN_SYSTEM 0
> +#define I915_CREATE_DOMAIN_STOLEN 1 /* Cannot use CPU mmaps or pread/pwrite */
> +	/**
> +	 * Requested cache level.
> +	 *
> +	 * See DRM_IOCTL_I915_GEM_SET_CACHING
> +	 */
> +	__u32 caching;
> +
> +	/**
> +	 * Requested tiling mode.
> +	 *
> +	 * See DRM_IOCTL_I915_GEM_SET_TILING
> +	 */
> +	__u32 tiling_mode;
> +	/**
> +	 * Requested stride for tiling.
> +	 *
> +	 * See DRM_IOCTL_I915_GEM_SET_TILING
> +	 */
> +	__u32 stride;
> +
> +	/**
> +	 * Additional miscellaneous flags
> +	 *
> +	 */
> +	__u32 flags;
> +
> +	/**
> +	 * Returned handle for the object.
> +	 *
> +	 * Object handles are nonzero.
> +	 */
> +	__u32 handle;
> +};
> +
>  struct drm_i915_gem_pread {
>  	/** Handle for the object being read. */
>  	__u32 handle;
> -- 
> 1.8.3.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch


[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux