[PATCH] drm/i915: Introduce a new create ioctl for user specified placement

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Despite being a unified memory architecture (UMA) some bits of memory
are more equal than others. In particular we have the thorny issue of
stolen memory, memory stolen from the system by the BIOS and reserved
for igfx use. Stolen memory is required for some functions of the GPU
and display engine, but in general it goes wasted. Whilst we cannot
return it back to the system, we need to find some other method for
utilising it. As we do not support direct access to the physical address
in the stolen region, it behaves like a different class of memory,
closer in kin to local GPU memory. This strongly suggests that we need a
placement model like TTM if we are to fully utilize these discrete
chunks of differing memory.

This new create ioctl therefore exists to allow the user to create these
second class buffer objects from stolen memory. At the moment direct
access by the CPU through mmaps and pread/pwrite are verboten on the
objects, and so the user must be aware of the limitations of the objects
created. Yet, those limitations rarely reduce the desired functionality
in many use cases and so the user should be able to easily fill the
stolen memory and so help to reduce overall memory pressure.

The most obvious use case for stolen memory is for the creation of objects
for the display engine which already have very similar restrictions on
access. However, we want a reasonably general ioctl in order to cater
for diverse scenarios beyond the author's imagination.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c        |   5 +-
 drivers/gpu/drm/i915/i915_drv.h        |  15 +++--
 drivers/gpu/drm/i915/i915_gem.c        |  69 +++++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_tiling.c | 107 ++++++++++++++++++---------------
 include/uapi/drm/i915_drm.h            |  55 +++++++++++++++++
 5 files changed, 194 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0e22142..0c7299d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1876,8 +1876,8 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED),
@@ -1889,6 +1889,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_AUTH|DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dc7475d..cb54a66 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1646,6 +1646,8 @@ int i915_gem_init_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_create_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv);
+int i915_gem_create2_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv);
 int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
@@ -1680,10 +1682,10 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
 int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
-int i915_gem_set_tiling(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_get_tiling(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
+int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv);
+int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
@@ -1797,6 +1799,8 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
 					    uint32_t read_domains,
 					    uint32_t write_domain);
+int __must_check i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
+					    int tiling_mode, int pitch);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
@@ -1832,6 +1836,9 @@ void i915_gem_detach_phys_object(struct drm_device *dev,
 void i915_gem_free_all_phys_object(struct drm_device *dev);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
+bool
+i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode);
+
 uint32_t
 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode);
 uint32_t
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3ea54c8..ac24bfb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -242,8 +242,7 @@ i915_gem_dumb_create(struct drm_file *file,
 	/* have to work out size/pitch and return them */
 	args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
 	args->size = args->pitch * args->height;
-	return i915_gem_create(file, dev,
-			       args->size, &args->handle);
+	return i915_gem_create(file, dev, args->size, &args->handle);
 }
 
 int i915_gem_dumb_destroy(struct drm_file *file,
@@ -261,9 +260,71 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file)
 {
 	struct drm_i915_gem_create *args = data;
+	return i915_gem_create(file, dev, args->size, &args->handle);
+}
+
+int
+i915_gem_create2_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file)
+{
+	struct drm_i915_gem_create2 *args = data;
+	struct drm_i915_gem_object *obj;
+	unsigned cache_level;
+	int ret;
+
+	if (args->flags & ~(I915_CREATE_FLAG_INEXACT_DOMAIN))
+		return -EINVAL;
+
+	if (!i915_tiling_ok(dev,
+			    args->stride, args->size, args->tiling_mode))
+		return -EINVAL;
+
+	switch (args->caching) {
+	case I915_CACHING_NONE:
+		cache_level = I915_CACHE_NONE;
+		break;
+	case I915_CACHING_CACHED:
+		cache_level = I915_CACHE_LLC;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (args->size == 0 || args->size & 4095)
+		return -EINVAL;
+
+	obj = NULL;
+	switch (args->domain) {
+	case I915_CREATE_DOMAIN_SYSTEM:
+		obj = i915_gem_alloc_object(dev, args->size);
+		break;
+	case I915_CREATE_DOMAIN_STOLEN:
+		obj = i915_gem_object_create_stolen(dev, args->size);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (obj == NULL)
+		return -ENOMEM;
+
+	mutex_lock(&dev->struct_mutex);
+	ret =  i915_gem_object_set_cache_level(obj, cache_level);
+	if (ret)
+		goto out;
 
-	return i915_gem_create(file, dev,
-			       args->size, &args->handle);
+	ret = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
+	if (ret)
+		goto out;
+
+	ret = drm_gem_handle_create(file, &obj->base, &args->handle);
+	if (ret)
+		goto out;
+
+	trace_i915_gem_object_create(obj);
+out:
+	drm_gem_object_unreference(&obj->base);
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
 }
 
 static inline int
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 92a8d27..cbd89f8 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -201,7 +201,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 }
 
 /* Check pitch constriants for all chips & tiling formats */
-static bool
+bool
 i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 {
 	int tile_width;
@@ -285,13 +285,67 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
 	return true;
 }
 
+int
+i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
+			   int tiling_mode, int stride)
+{
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	int ret;
+
+	if (tiling_mode == obj->tiling_mode && stride == obj->stride)
+		return 0;
+
+	/* We need to rebind the object if its current allocation
+	 * no longer meets the alignment restrictions for its new
+	 * tiling mode. Otherwise we can just leave it alone, but
+	 * need to ensure that any fence register is updated before
+	 * the next fenced (either through the GTT or by the BLT unit
+	 * on older GPUs) access.
+	 *
+	 * After updating the tiling parameters, we then flag whether
+	 * we need to update an associated fence register. Note this
+	 * has to also include the unfenced register the GPU uses
+	 * whilst executing a fenced command for an untiled object.
+	 */
+
+	obj->map_and_fenceable =
+		!i915_gem_obj_ggtt_bound(obj) ||
+		(i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end &&
+		 i915_gem_object_fence_ok(obj, tiling_mode));
+
+	/* Rebind if we need a change of alignment */
+	ret = 0;
+	if (!obj->map_and_fenceable) {
+		u32 unfenced_alignment =
+			i915_gem_get_gtt_alignment(dev_priv->dev,
+						   obj->base.size, tiling_mode,
+						   false);
+		if (i915_gem_obj_ggtt_offset(obj) & (unfenced_alignment - 1))
+			ret = i915_gem_object_unbind(obj);
+	}
+
+	if (ret == 0) {
+		obj->fence_dirty =
+			obj->fenced_gpu_access ||
+			obj->fence_reg != I915_FENCE_REG_NONE;
+
+		obj->tiling_mode = tiling_mode;
+		obj->stride = stride;
+
+		/* Force the fence to be reacquired for GTT access */
+		i915_gem_release_mmap(obj);
+	}
+
+	return ret;
+}
+
 /**
  * Sets the tiling mode of an object, returning the required swizzling of
  * bit 6 of addresses in the object.
  */
 int
-i915_gem_set_tiling(struct drm_device *dev, void *data,
-		   struct drm_file *file)
+i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
 {
 	struct drm_i915_gem_set_tiling *args = data;
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -343,48 +397,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	if (args->tiling_mode != obj->tiling_mode ||
-	    args->stride != obj->stride) {
-		/* We need to rebind the object if its current allocation
-		 * no longer meets the alignment restrictions for its new
-		 * tiling mode. Otherwise we can just leave it alone, but
-		 * need to ensure that any fence register is updated before
-		 * the next fenced (either through the GTT or by the BLT unit
-		 * on older GPUs) access.
-		 *
-		 * After updating the tiling parameters, we then flag whether
-		 * we need to update an associated fence register. Note this
-		 * has to also include the unfenced register the GPU uses
-		 * whilst executing a fenced command for an untiled object.
-		 */
-
-		obj->map_and_fenceable =
-			!i915_gem_obj_ggtt_bound(obj) ||
-			(i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end &&
-			 i915_gem_object_fence_ok(obj, args->tiling_mode));
-
-		/* Rebind if we need a change of alignment */
-		if (!obj->map_and_fenceable) {
-			u32 unfenced_alignment =
-				i915_gem_get_gtt_alignment(dev, obj->base.size,
-							    args->tiling_mode,
-							    false);
-			if (i915_gem_obj_ggtt_offset(obj) & (unfenced_alignment - 1))
-				ret = i915_gem_object_unbind(obj);
-		}
-
-		if (ret == 0) {
-			obj->fence_dirty =
-				obj->fenced_gpu_access ||
-				obj->fence_reg != I915_FENCE_REG_NONE;
-
-			obj->tiling_mode = args->tiling_mode;
-			obj->stride = args->stride;
-
-			/* Force the fence to be reacquired for GTT access */
-			i915_gem_release_mmap(obj);
-		}
-	}
+	ret = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
 	/* we have to maintain this existing ABI... */
 	args->stride = obj->stride;
 	args->tiling_mode = obj->tiling_mode;
@@ -410,8 +423,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
  * Returns the current tiling mode and required bit 6 swizzling for the object.
  */
 int
-i915_gem_get_tiling(struct drm_device *dev, void *data,
-		   struct drm_file *file)
+i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
 {
 	struct drm_i915_gem_get_tiling *args = data;
 	drm_i915_private_t *dev_priv = dev->dev_private;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 923ed7f..b3b21fb 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SET_CACHING	0x2f
 #define DRM_I915_GEM_GET_CACHING	0x30
 #define DRM_I915_REG_READ		0x31
+#define DRM_I915_GEM_CREATE2		0x32
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -228,6 +229,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_ENTERVT	DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT)
 #define DRM_IOCTL_I915_GEM_LEAVEVT	DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT)
 #define DRM_IOCTL_I915_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create)
+#define DRM_IOCTL_I915_GEM_CREATE2	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE2, struct drm_i915_gem_create2)
 #define DRM_IOCTL_I915_GEM_PREAD	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread)
 #define DRM_IOCTL_I915_GEM_PWRITE	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite)
 #define DRM_IOCTL_I915_GEM_MMAP		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap)
@@ -407,6 +409,59 @@ struct drm_i915_gem_create {
 	__u32 pad;
 };
 
+struct drm_i915_gem_create2 {
+	/**
+	 * Requested size for the object.
+	 *
+	 * The (page-aligned) allocated size for the object will be returned.
+	 */
+	__u64 size;
+
+	/**
+	 * Requested placement or memory domain
+	 *
+	 * You can request that the object be created from special memory
+	 * rather than regular system pages. Such irregular objects may
+	 * have certain restrictions (such as CPU access to a stolen
+	 * object is verboten).
+	 */
+	__u32 domain;
+#define I915_CREATE_DOMAIN_SYSTEM 0
+#define I915_CREATE_DOMAIN_STOLEN 1 /* Cannot use CPU mmaps or pread/pwrite */
+	/**
+	 * Requested cache level.
+	 *
+	 * See DRM_IOCTL_I915_GEM_SET_CACHING
+	 */
+	__u32 caching;
+
+	/**
+	 * Requested tiling mode.
+	 *
+	 * See DRM_IOCTL_I915_GEM_SET_TILING
+	 */
+	__u32 tiling_mode;
+	/**
+	 * Requested stride for tiling.
+	 *
+	 * See DRM_IOCTL_I915_GEM_SET_TILING
+	 */
+	__u32 stride;
+
+	/**
+	 * Additional miscellaneous flags
+	 *
+	 */
+	__u32 flags;
+
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 struct drm_i915_gem_pread {
 	/** Handle for the object being read. */
 	__u32 handle;
-- 
1.8.3.2



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux