Re: [RFC] libdrm_intel: Add support for userptr objects

Ben Widawsky <ben@xxxxxxxxxxxx> · Fri, 2 May 2014 10:15:30 -0700

On Fri, May 02, 2014 at 11:27:45AM +0100, Tvrtko Ursulin wrote:
> 
> On 05/01/2014 07:47 PM, Ben Widawsky wrote:
> >On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
> >>From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
> >>
> >>Allow userptr objects to be created and used via libdrm_intel.
> >>
> >>At the moment tiling and mapping to GTT aperture is not supported
> >>due hardware limitations across different generations and uncertainty
> >>about its usefulness.
> >>
> >>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
> >>---
> >>  include/drm/i915_drm.h    |  16 +++++
> >>  intel/intel_bufmgr.c      |  13 ++++
> >>  intel/intel_bufmgr.h      |   5 ++
> >>  intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
> >>  intel/intel_bufmgr_priv.h |  12 +++-
> >>  5 files changed, 198 insertions(+), 2 deletions(-)
> >>
> >>diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> >>index 2f4eb8c..d32ef99 100644
> >>--- a/include/drm/i915_drm.h
> >>+++ b/include/drm/i915_drm.h
> >>@@ -223,6 +223,7 @@ typedef struct _drm_i915_sarea {
> >>  #define DRM_I915_GEM_GET_CACHING	0x30
> >>  #define DRM_I915_REG_READ		0x31
> >>  #define DRM_I915_GET_RESET_STATS	0x32
> >>+#define DRM_I915_GEM_USERPTR		0x34
> >>
> >>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> >>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> >>@@ -273,6 +274,7 @@ typedef struct _drm_i915_sarea {
> >>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
> >>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
> >>  #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> >>+#define DRM_IOCTL_I915_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR,  struct drm_i915_gem_userptr)
> >>
> >>  /* Allow drivers to submit batchbuffers directly to hardware, relying
> >>   * on the security mechanisms provided by hardware.
> >>@@ -498,6 +500,20 @@ struct drm_i915_gem_mmap_gtt {
> >>  	__u64 offset;
> >>  };
> >>
> >>+struct drm_i915_gem_userptr {
> >>+	__u64 user_ptr;
> >>+	__u64 user_size;
> >
> >Adding alignment might be a safe bet.
> 
> Hmmmm, at first I thought you are raising a good point. But then I don't
> understand why I don't see any aligned types in
> linux/include/uapi/drm/i915_drm.h ?

I meant an alignment field. I was thinking if some buffers have weird
alignment requirements for the GPU (but not the CPU) making that info
available to the kernel would be important.

As another potentially lacking field (which I happen not to care about
for the vmap/userptr style usage), we have certain buffers that need to
live within a certain part of the GPU address space.

I should mention that when I wrote the previous email, I didn't have
access to the latest kernel implementation when I sent the mail. I had
been assuming (because I've been wrapped up in my own, different world),
that the address space allocation was done at the time of the IOCTL. It
is not, and therefore, my request for the existing code does not make
sense.

With that though, I should describe my usecase briefly. I want the
ability to carve out the address space at the time of the IOCTL. As
such, I need to know some of these attributes.

> 
> >>+	__u32 flags;
> >>+#define I915_USERPTR_READ_ONLY 0x1
> >
> >I'll eventually want something like:
> >#define I915_MIRRORED_GVA 0x2 /* Request the same GPU address */
> 
> Plenty of free flags. :)
> 
> >>+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> >>+	/**
> >>+	* Returned handle for the object.
> >>+	*
> >>+	* Object handles are nonzero.
> >>+	*/
> >>+	__u32 handle;
> >>+};
> >>+
> >>  struct drm_i915_gem_set_domain {
> >>  	/** Handle for the object */
> >>  	__u32 handle;
> >>diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
> >>index 905556f..7f3d795 100644
> >>--- a/intel/intel_bufmgr.c
> >>+++ b/intel/intel_bufmgr.c
> >>@@ -60,6 +60,19 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
> >>  	return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment);
> >>  }
> >>
> >>+drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
> >>+					  const char *name, void *addr,
> >>+					  uint32_t tiling_mode,
> >>+					  uint32_t stride,
> >>+					  unsigned long size,
> >>+					  unsigned long flags)
> >>+{
> >>+	if (bufmgr->bo_alloc_userptr)
> >>+		return bufmgr->bo_alloc_userptr(bufmgr, name, addr, tiling_mode,
> >>+						stride, size, flags);
> >>+	return NULL;
> >>+}
> >>+
> >>  drm_intel_bo *
> >>  drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
> >>                          int x, int y, int cpp, uint32_t *tiling_mode,
> >>diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> >>index 9383c72..be83a56 100644
> >>--- a/intel/intel_bufmgr.h
> >>+++ b/intel/intel_bufmgr.h
> >>@@ -113,6 +113,11 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
> >>  					    const char *name,
> >>  					    unsigned long size,
> >>  					    unsigned int alignment);
> >>+drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
> >>+					const char *name,
> >>+					void *addr, uint32_t tiling_mode,
> >>+					uint32_t stride, unsigned long size,
> >>+					unsigned long flags);
> >>  drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
> >>  				       const char *name,
> >>  				       int x, int y, int cpp,
> >>diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> >>index 007a6d8..7cad945 100644
> >>--- a/intel/intel_bufmgr_gem.c
> >>+++ b/intel/intel_bufmgr_gem.c
> >>@@ -182,6 +182,11 @@ struct _drm_intel_bo_gem {
> >>  	void *mem_virtual;
> >>  	/** GTT virtual address for the buffer, saved across map/unmap cycles */
> >>  	void *gtt_virtual;
> >>+	/**
> >>+	 * Virtual address of the buffer allocated by user, used for userptr
> >>+	 * objects only.
> >>+	 */
> >>+	void *user_virtual;
> >
> >Can we not just re-use mem_virtual? Do you intend to provide the ability
> >to have two distinct CPU mappings to the same object?
> 
> Haven't heard anything like that. By quick look at the code seems that
> reusing mem_virtual would require special casing some parts which touch it.
> So it may be it is actually cleaner as it is.
> 

I didn't look too hard, but I think it might *just work* if you use
mem_virtual. I will try it and let you know.

> >>  	int map_count;
> >>  	drmMMListHead vma_list;
> >>
> >>@@ -221,6 +226,11 @@ struct _drm_intel_bo_gem {
> >>  	bool idle;
> >>
> >>  	/**
> >>+	 * Boolean of whether this buffer was allocated with userptr
> >>+	 */
> >>+	bool is_userptr;
> >>+
> >>+	/**
> >>  	 * Size in bytes of this buffer and its relocation descendents.
> >>  	 *
> >>  	 * Used to avoid costly tree walking in
> >>@@ -847,6 +857,80 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
> >>  					       tiling, stride);
> >>  }
> >>
> >>+static drm_intel_bo *
> >>+drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
> >>+				const char *name,
> >>+				void *addr,
> >>+				uint32_t tiling_mode,
> >>+				uint32_t stride,
> >>+				unsigned long size,
> >>+				unsigned long flags)
> >
> >I'd vote for dropping tiling_mode, stride. Make size and flags
> >explicitly sized types. Not doing this has bitten us already.
> 
> Some people expressed interest in tiling so I thought to preserve it in the
> API.
> 
> Kernel even allows it, and it is just because Chris found bspec references
> saying it will break badly on some platforms, that I (or maybe it was both
> of us) decided to disable it on this level for time being.
> 
> So I think it is just a matter of coming up with a blacklist and it would be
> possible to use it then.
> 

Well again, maybe this is specific to my usecase, but I can never
imagine a use for such fields at this stage in the buffer's lifecycle.
Could you get "some people" to describe how they want to use it?

> >>+{
> >>+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
> >>+	drm_intel_bo_gem *bo_gem;
> >>+	int ret;
> >>+	struct drm_i915_gem_userptr userptr;
> >>+
> >>+	/* Tiling with userptr surfaces is not supported
> >>+	 * on all hardware so refuse it for time being.
> >>+	 */
> >>+	if (tiling_mode != I915_TILING_NONE)
> >>+		return NULL;
> >>+
> >>+	bo_gem = calloc(1, sizeof(*bo_gem));
> >>+	if (!bo_gem)
> >>+		return NULL;
> >>+
> >>+	bo_gem->bo.size = size;
> >>+
> >>+	VG_CLEAR(userptr);
> >>+	userptr.user_ptr = (__u64)((unsigned long)addr);
> >
> >How are we getting away with non page aligned addresses? I'd vote for
> >moving some of the memalign checks into here if all cases require page
> >aligned.
> 
> Kernel will reject any unaligned address or size in the ioctl.
> 
> >>+	userptr.user_size = size;
> >>+	userptr.flags = flags;
> >>+
> >>+	ret = drmIoctl(bufmgr_gem->fd,
> >>+			DRM_IOCTL_I915_GEM_USERPTR,
> >>+			&userptr);
> >>+	if (ret != 0) {
> >>+		DBG("bo_create_userptr: "
> >>+		    "ioctl failed with user ptr %p size 0x%lx, "
> >>+		    "user flags 0x%lx\n", addr, size, flags);
> >>+		free(bo_gem);
> >>+		return NULL;
> >>+	}
> >>+
> >>+	bo_gem->gem_handle = userptr.handle;
> >>+	bo_gem->bo.handle = bo_gem->gem_handle;
> >>+	bo_gem->bo.bufmgr    = bufmgr;
> >>+	bo_gem->is_userptr   = true;
> >>+	bo_gem->bo.virtual   = addr;
> >>+	/* Save the address provided by user */
> >>+	bo_gem->user_virtual = addr;
> >>+	bo_gem->tiling_mode  = I915_TILING_NONE;
> >>+	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
> >>+	bo_gem->stride       = 0;
> >>+
> >>+	DRMINITLISTHEAD(&bo_gem->name_list);
> >>+	DRMINITLISTHEAD(&bo_gem->vma_list);
> >>+
> >>+	bo_gem->name = name;
> >>+	atomic_set(&bo_gem->refcount, 1);
> >>+	bo_gem->validate_index = -1;
> >>+	bo_gem->reloc_tree_fences = 0;
> >>+	bo_gem->used_as_reloc_target = false;
> >>+	bo_gem->has_error = false;
> >>+	bo_gem->reusable = false;
> >
> >TODO for someone: Some room to consolidate this with the other create
> >functions.
> >
> >>+
> >>+	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
> >
> >I'm looking at the code and trying to figure out how this is relevant.
> >It's not introduced by you directly. Just looking at the math it all
> >seems to kind of mix-up aperture and non-aperture usages. Just talking
> >to myself...
> >
> >>+
> >>+	DBG("bo_create_userptr: "
> >>+	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
> >>+		addr, bo_gem->gem_handle, bo_gem->name,
> >>+		size, stride, tiling_mode);
> >>+
> >>+	return &bo_gem->bo;
> >>+}
> >>+
> >>  /**
> >>   * Returns a drm_intel_bo wrapping the given buffer object handle.
> >>   *
> >>@@ -1173,6 +1257,12 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
> >>  	struct drm_i915_gem_set_domain set_domain;
> >>  	int ret;
> >>
> >>+	if (bo_gem->is_userptr) {
> >>+		/* Return the same user ptr */
> >>+		bo->virtual = bo_gem->user_virtual;
> >>+		return 0;
> >>+	}
> >>+
> >>  	pthread_mutex_lock(&bufmgr_gem->lock);
> >>
> >>  	if (bo_gem->map_count++ == 0)
> >>@@ -1241,6 +1331,9 @@ map_gtt(drm_intel_bo *bo)
> >>  	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> >>  	int ret;
> >>
> >>+	if (bo_gem->is_userptr)
> >>+		return -EINVAL;
> >>+
> >>  	if (bo_gem->map_count++ == 0)
> >>  		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
> >>
> >>@@ -1385,13 +1478,18 @@ int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
> >>
> >>  static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
> >>  {
> >>-	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> >>+	drm_intel_bufmgr_gem *bufmgr_gem;
> >
> >I'm missing why this hunk was changed. But, okay.
> 
> Just to ensure NULL bo is handled nicely I guess.
> 
> >>  	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> >>  	int ret = 0;
> >>
> >>  	if (bo == NULL)
> >>  		return 0;
> >>
> >>+	if (bo_gem->is_userptr)
> >>+		return 0;
> >>+
> >>+	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> >>+
> >>  	pthread_mutex_lock(&bufmgr_gem->lock);
> >>
> >>  	if (bo_gem->map_count <= 0) {
> >>@@ -1449,6 +1547,9 @@ drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
> >>  	struct drm_i915_gem_pwrite pwrite;
> >>  	int ret;
> >>
> >>+	if (bo_gem->is_userptr)
> >>+		return -EINVAL;
> >>+
> >>  	VG_CLEAR(pwrite);
> >>  	pwrite.handle = bo_gem->gem_handle;
> >>  	pwrite.offset = offset;
> >>@@ -1501,6 +1602,9 @@ drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
> >>  	struct drm_i915_gem_pread pread;
> >>  	int ret;
> >>
> >>+	if (bo_gem->is_userptr)
> >>+		return -EINVAL;
> >>+
> >>  	VG_CLEAR(pread);
> >>  	pread.handle = bo_gem->gem_handle;
> >>  	pread.offset = offset;
> >>@@ -2460,6 +2564,12 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
> >>  	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> >>  	int ret;
> >>
> >>+	/* Tiling with userptr surfaces is not supported
> >>+	 * on all hardware so refuse it for time being.
> >>+	 */
> >>+	if (bo_gem->is_userptr)
> >>+		return -EINVAL;
> >>+
> >>  	/* Linear buffers have no stride. By ensuring that we only ever use
> >>  	 * stride 0 with linear buffers, we simplify our code.
> >>  	 */
> >>@@ -3181,6 +3291,44 @@ drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
> >>  	bo_gem->aub_annotation_count = count;
> >>  }
> >>
> >>+static bool
> >>+has_userptr(int fd)
> >>+{
> >>+	int ret;
> >>+	void *ptr;
> >>+	long pgsz;
> >>+	struct drm_i915_gem_userptr userptr;
> >>+	struct drm_gem_close close_bo;
> >>+
> >>+	pgsz = sysconf(_SC_PAGESIZE);
> >>+	assert(ret > 0);
> >>+
> >>+	ret = posix_memalign(&ptr, pgsz, pgsz);
> >>+	assert(ret == 0);
> >>+
> >>+	memset(&userptr, 0, sizeof(userptr));
> >>+	userptr.user_ptr = (__u64)(unsigned long)ptr;
> >>+	userptr.user_size = pgsz;
> >>+
> >>+retry:
> >>+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
> >>+	if (ret) {
> >>+		if (errno == ENODEV && userptr.flags == 0) {
> >>+			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
> >>+			goto retry;
> >>+		}
> >>+		free(ptr);
> >>+		return false;
> >>+	}
> >>+
> >>+	close_bo.handle = userptr.handle;
> >>+	ret = drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
> >>+	assert(ret == 0);
> >>+	free(ptr);
> >>+
> >>+	return true;
> >>+}
> >
> >I think a param for USERPTR is warranted. Or did we decide not to do
> >this already?
> 
> I asked for it, but people with authority said no. It is all very weak,
> fragile and dangerous anyway - param or feature detect like the above.
> 
> We would really need a proper feature detect mechanism, like text based in
> sysfs or something.
> 

I don't see why a param is fragile. Feature detect OTOH is almost always
implemented in a fragile manner.

> >>+
> >>  /**
> >>   * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
> >>   * and manage map buffer objections.
> >>@@ -3273,6 +3421,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
> >>  	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
> >>  	bufmgr_gem->has_relaxed_fencing = ret == 0;
> >>
> >>+	if (has_userptr(fd))
> >>+		bufmgr_gem->bufmgr.bo_alloc_userptr =
> >>+			drm_intel_gem_bo_alloc_userptr;
> >>+
> >>  	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
> >>  	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
> >>  	bufmgr_gem->has_wait_timeout = ret == 0;
> >>diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
> >>index 2592d42..3aa1abb 100644
> >>--- a/intel/intel_bufmgr_priv.h
> >>+++ b/intel/intel_bufmgr_priv.h
> >>@@ -60,7 +60,17 @@ struct _drm_intel_bufmgr {
> >>  					      const char *name,
> >>  					      unsigned long size,
> >>  					      unsigned int alignment);
> >>-
> >>+	/**
> >>+	 * Allocate a buffer object from an existing user accessible
> >>+	 * address malloc'd with the provided size.
> >>+	 * Alignment is used when mapping to the gtt.
> >>+	 * Flags may be I915_VMAP_READ_ONLY or I915_USERPTR_UNSYNCHRONIZED
> >>+	 */
> >>+	drm_intel_bo *(*bo_alloc_userptr)(drm_intel_bufmgr *bufmgr,
> >>+					  const char *name, void *addr,
> >>+					  uint32_t tiling_mode, uint32_t stride,
> >>+					  unsigned long size,
> >>+					  unsigned long flags);
> >>  	/**
> >>  	 * Allocate a tiled buffer object.
> >>  	 *
> >
> >Probably don't need the special function pointer yet since I don't think
> >we can yet envision use cases which will require any kind of special
> >handling. A simple has_userptr in bufmgr_gem will probably suffice. But
> >I don't care too much either way.
> 
> What do you mean?

Don't add bo_alloc_userptr to the bufmgr. Just add the prototype to
intel_bufmgr.h.

> 
> >I couldn't spot any real bugs...
> 
> Cool, I am glad there is some interest for this.
> 

I'm pretty close to running this with most of the changes I had asked
you for. I need to see how much of your igt test I can reuse now.

> Regards,
> 
> Tvrtko

-- 
Ben Widawsky, Intel Open Source Technology Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx