Re: [PATCH] intel: New libdrm interface to create unbound wc user mappings for objects

Damien Lespiau <damien.lespiau@xxxxxxxxx> · Sat, 25 Oct 2014 13:45:05 +0100

On Sat, Oct 25, 2014 at 05:21:53PM +0530, akash.goel@xxxxxxxxx wrote:
> From: Akash Goel <akash.goel@xxxxxxxxx>
> 
> A new libdrm interface 'drm_intel_gem_bo_map_wc' is provided by this
> patch. Through this interface Gfx clients can create write combining
> virtual mappings of the Gem object. It will provide the same funtionality
> of 'mmap_gtt' interface without the constraints of limited aperture space,
> but provided clients handles the linear to tile conversion on their own.
> This patch is intended for improving the CPU write operation performance,
> as with such mapping, writes are almost 50% faster than with mmap_gtt.
> Also it avoids the Cache flush after update from CPU side, when object is
> passed on to GPU, which will be the case if regular mmap interface is used.
> This type of mapping is specially useful in case of sub-region
> update, i.e. when only a portion of the object is to be updated.
> Also there is a support for the unsynchronized version of this interface
> named 'drm_intel_gem_bo_map_wc_unsynchronized', through which wc virtual
> mappings, but unsynchronized one, can be created of the Gem object.
> To ensure the cache coherency, before using this mapping, the GTT domain has
> been reused here. This provides the required Cache flush if the object is in
> CPU domain or synchronization against the concurrent rendering
> 
> The drm_i915_gem_mmap structure (for the DRM_I915_GEM_MMAP_IOCTL) has been
> extended with a new flags field (defaulting to 0 for existent users). In
> order for userspace to detect the extended ioctl, a new parameter
> I915_PARAM_HAS_EXT_MMAP has been added for versioning the ioctl interface.
> 
> v2: Aligned with the v2 of the corresponding kernel patch (Chris)
> 
> Signed-off-by: Akash Goel <akash.goel@xxxxxxxxx>
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>

>From a quick glance at the patch:

It looks like you copy/pasted the map() implementation and changed a few
things here and there instead of adding flag to drm_intel_gem_bo_map()
and reusing the current code.

Can we expose another version of map that takes flags (_WC,
_UNSYNCHRONIZED, ...) instead of starting to have every single
combination possible?

Do we really need a separate mem_wc_virtual? Using _map() or _map_wc()
in an exclusively way makes some sense to me.

With the introduction of mem_wc_virtual, you left aside the vma cache
handling and so we'll never end up unmapping it, drm_intel_gem_bo_free()
doesn't unmap it either, ...

I'd just expect users to use _unmap().

-- 
Damien

> ---
>  include/drm/i915_drm.h   |   9 ++++
>  intel/intel_bufmgr.h     |   3 ++
>  intel/intel_bufmgr_gem.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 136 insertions(+)
> 
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index 15dd01d..a91a1d0 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
>  #define I915_PARAM_HAS_WT     	 	 27
>  #define I915_PARAM_CMD_PARSER_VERSION	 28
> +#define I915_PARAM_MMAP_VERSION		 29
>  
>  typedef struct drm_i915_getparam {
>  	int param;
> @@ -487,6 +488,14 @@ struct drm_i915_gem_mmap {
>  	 * This is a fixed-size type for 32/64 compatibility.
>  	 */
>  	__u64 addr_ptr;
> +
> +	/**
> +	 * Flags for extended behaviour.
> +	 *
> +	 * Added in version 2.
> +	*/
> +	__u64 flags;
> +#define I915_MMAP_WC 0x1
>  };
>  
>  struct drm_i915_gem_mmap_gtt {
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> index be83a56..bda4115 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -177,6 +177,9 @@ void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr,
>  int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo);
>  int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo);
>  int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
> +int drm_intel_gem_bo_map_wc_unsynchronized(drm_intel_bo *bo);
> +int drm_intel_gem_bo_map_wc(drm_intel_bo *bo);
> +int drm_intel_gem_bo_unmap_wc(drm_intel_bo *bo);
>  
>  int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo);
>  void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start);
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> index f2f4fea..95c588f 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -184,6 +184,8 @@ struct _drm_intel_bo_gem {
>  	int reloc_count;
>  	/** Mapped address for the buffer, saved across map/unmap cycles */
>  	void *mem_virtual;
> +	/** Uncached Mapped address for the buffer, saved across map/unmap cycles */
> +	void *mem_wc_virtual;
>  	/** GTT virtual address for the buffer, saved across map/unmap cycles */
>  	void *gtt_virtual;
>  	/**
> @@ -1267,6 +1269,121 @@ static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
>  	}
>  }
>  
> +static int
> +map_wc(drm_intel_bo *bo)
> +{
> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +	int ret;
> +
> +	if (bo_gem->is_userptr)
> +		return -EINVAL;
> +
> +	if (bo_gem->map_count++ == 0)
> +		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
> +
> +	/* Get a mapping of the buffer if we haven't before. */
> +	if (bo_gem->mem_wc_virtual == NULL) {
> +		struct drm_i915_gem_mmap mmap_arg;
> +
> +		DBG("bo_map_wc: mmap %d (%s), map_count=%d\n",
> +		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
> +
> +		VG_CLEAR(mmap_arg);
> +		mmap_arg.handle = bo_gem->gem_handle;
> +		/* To indicate the uncached virtual mapping to KMD */
> +		mmap_arg.flags = I915_MMAP_WC;
> +		mmap_arg.offset = 0;
> +		mmap_arg.size = bo->size;
> +		ret = drmIoctl(bufmgr_gem->fd,
> +			       DRM_IOCTL_I915_GEM_MMAP,
> +			       &mmap_arg);
> +		if (ret != 0) {
> +			ret = -errno;
> +			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
> +			    __FILE__, __LINE__, bo_gem->gem_handle,
> +			    bo_gem->name, strerror(errno));
> +			if (--bo_gem->map_count == 0)
> +				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
> +			pthread_mutex_unlock(&bufmgr_gem->lock);
> +			return ret;
> +		}
> +		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
> +		bo_gem->mem_wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
> +	}
> +
> +	bo->virtual = bo_gem->mem_wc_virtual;
> +
> +	DBG("bo_map_wc: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
> +	    bo_gem->mem_wc_virtual);
> +
> +	return 0;
> +}
> +
> +/* To be used in a similar way to mmap_gtt */
> +drm_public int
> +drm_intel_gem_bo_map_wc(drm_intel_bo *bo) {
> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +	struct drm_i915_gem_set_domain set_domain;
> +	int ret;
> +
> +	pthread_mutex_lock(&bufmgr_gem->lock);
> +
> +	ret = map_wc(bo);
> +	if (ret) {
> +		pthread_mutex_unlock(&bufmgr_gem->lock);
> +		return ret;
> +	}
> +
> +	/* Now move it to the GTT domain so that the GPU and CPU
> +	 * caches are flushed and the GPU isn't actively using the
> +	 * buffer.
> +	 *
> +	 * The domain change is done even for the objects which
> +	 * are not bounded. For them first the pages are acquired,
> +	 * before the domain change.
> +	 */
> +	VG_CLEAR(set_domain);
> +	set_domain.handle = bo_gem->gem_handle;
> +	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
> +	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
> +	ret = drmIoctl(bufmgr_gem->fd,
> +		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
> +		       &set_domain);
> +	if (ret != 0) {
> +		DBG("%s:%d: Error setting domain %d: %s\n",
> +		    __FILE__, __LINE__, bo_gem->gem_handle,
> +		    strerror(errno));
> +	}
> +	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
> +	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
> +	pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> +	return 0;
> +}
> +
> +drm_public int
> +drm_intel_gem_bo_map_wc_unsynchronized(drm_intel_bo *bo) {
> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +#ifdef HAVE_VALGRIND
> +	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +#endif
> +	int ret;
> +
> +	pthread_mutex_lock(&bufmgr_gem->lock);
> +
> +	ret = map_wc(bo);
> +	if (ret == 0) {
> +		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
> +		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
> +	}
> +
> +	pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> +	return ret;
> +}
> +
>  static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
>  {
>  	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> @@ -1293,6 +1410,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
>  
>  		VG_CLEAR(mmap_arg);
>  		mmap_arg.handle = bo_gem->gem_handle;
> +		mmap_arg.flags = 0;
>  		mmap_arg.offset = 0;
>  		mmap_arg.size = bo->size;
>  		ret = drmIoctl(bufmgr_gem->fd,
> @@ -1553,6 +1671,12 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
>  }
>  
>  drm_public int
> +drm_intel_gem_bo_unmap_wc(drm_intel_bo *bo)
> +{
> +	return drm_intel_gem_bo_unmap(bo);
> +}
> +
> +drm_public int
>  drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
>  {
>  	return drm_intel_gem_bo_unmap(bo);
> -- 
> 1.9.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx