On Sat, Oct 25, 2014 at 05:21:53PM +0530, akash.goel@xxxxxxxxx wrote: > From: Akash Goel <akash.goel@xxxxxxxxx> > > A new libdrm interface 'drm_intel_gem_bo_map_wc' is provided by this > patch. Through this interface Gfx clients can create write combining > virtual mappings of the Gem object. It will provide the same funtionality > of 'mmap_gtt' interface without the constraints of limited aperture space, > but provided clients handles the linear to tile conversion on their own. > This patch is intended for improving the CPU write operation performance, > as with such mapping, writes are almost 50% faster than with mmap_gtt. > Also it avoids the Cache flush after update from CPU side, when object is > passed on to GPU, which will be the case if regular mmap interface is used. > This type of mapping is specially useful in case of sub-region > update, i.e. when only a portion of the object is to be updated. > Also there is a support for the unsynchronized version of this interface > named 'drm_intel_gem_bo_map_wc_unsynchronized', through which wc virtual > mappings, but unsynchronized one, can be created of the Gem object. > To ensure the cache coherency, before using this mapping, the GTT domain has > been reused here. This provides the required Cache flush if the object is in > CPU domain or synchronization against the concurrent rendering > > The drm_i915_gem_mmap structure (for the DRM_I915_GEM_MMAP_IOCTL) has been > extended with a new flags field (defaulting to 0 for existent users). In > order for userspace to detect the extended ioctl, a new parameter > I915_PARAM_HAS_EXT_MMAP has been added for versioning the ioctl interface. > > v2: Aligned with the v2 of the corresponding kernel patch (Chris) > > Signed-off-by: Akash Goel <akash.goel@xxxxxxxxx> > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >From a quick glance at the patch: It looks like you copy/pasted the map() implementation and changed a few things here and there instead of adding flag to drm_intel_gem_bo_map() and reusing the current code. Can we expose another version of map that takes flags (_WC, _UNSYNCHRONIZED, ...) instead of starting to have every single combination possible? Do we really need a separate mem_wc_virtual? Using _map() or _map_wc() in an exclusively way makes some sense to me. With the introduction of mem_wc_virtual, you left aside the vma cache handling and so we'll never end up unmapping it, drm_intel_gem_bo_free() doesn't unmap it either, ... I'd just expect users to use _unmap(). -- Damien > --- > include/drm/i915_drm.h | 9 ++++ > intel/intel_bufmgr.h | 3 ++ > intel/intel_bufmgr_gem.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 136 insertions(+) > > diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h > index 15dd01d..a91a1d0 100644 > --- a/include/drm/i915_drm.h > +++ b/include/drm/i915_drm.h > @@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait { > #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 > #define I915_PARAM_HAS_WT 27 > #define I915_PARAM_CMD_PARSER_VERSION 28 > +#define I915_PARAM_MMAP_VERSION 29 > > typedef struct drm_i915_getparam { > int param; > @@ -487,6 +488,14 @@ struct drm_i915_gem_mmap { > * This is a fixed-size type for 32/64 compatibility. > */ > __u64 addr_ptr; > + > + /** > + * Flags for extended behaviour. > + * > + * Added in version 2. > + */ > + __u64 flags; > +#define I915_MMAP_WC 0x1 > }; > > struct drm_i915_gem_mmap_gtt { > diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h > index be83a56..bda4115 100644 > --- a/intel/intel_bufmgr.h > +++ b/intel/intel_bufmgr.h > @@ -177,6 +177,9 @@ void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, > int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo); > int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo); > int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo); > +int drm_intel_gem_bo_map_wc_unsynchronized(drm_intel_bo *bo); > +int drm_intel_gem_bo_map_wc(drm_intel_bo *bo); > +int drm_intel_gem_bo_unmap_wc(drm_intel_bo *bo); > > int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo); > void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start); > diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c > index f2f4fea..95c588f 100644 > --- a/intel/intel_bufmgr_gem.c > +++ b/intel/intel_bufmgr_gem.c > @@ -184,6 +184,8 @@ struct _drm_intel_bo_gem { > int reloc_count; > /** Mapped address for the buffer, saved across map/unmap cycles */ > void *mem_virtual; > + /** Uncached Mapped address for the buffer, saved across map/unmap cycles */ > + void *mem_wc_virtual; > /** GTT virtual address for the buffer, saved across map/unmap cycles */ > void *gtt_virtual; > /** > @@ -1267,6 +1269,121 @@ static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) > } > } > > +static int > +map_wc(drm_intel_bo *bo) > +{ > + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; > + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; > + int ret; > + > + if (bo_gem->is_userptr) > + return -EINVAL; > + > + if (bo_gem->map_count++ == 0) > + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); > + > + /* Get a mapping of the buffer if we haven't before. */ > + if (bo_gem->mem_wc_virtual == NULL) { > + struct drm_i915_gem_mmap mmap_arg; > + > + DBG("bo_map_wc: mmap %d (%s), map_count=%d\n", > + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); > + > + VG_CLEAR(mmap_arg); > + mmap_arg.handle = bo_gem->gem_handle; > + /* To indicate the uncached virtual mapping to KMD */ > + mmap_arg.flags = I915_MMAP_WC; > + mmap_arg.offset = 0; > + mmap_arg.size = bo->size; > + ret = drmIoctl(bufmgr_gem->fd, > + DRM_IOCTL_I915_GEM_MMAP, > + &mmap_arg); > + if (ret != 0) { > + ret = -errno; > + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", > + __FILE__, __LINE__, bo_gem->gem_handle, > + bo_gem->name, strerror(errno)); > + if (--bo_gem->map_count == 0) > + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); > + pthread_mutex_unlock(&bufmgr_gem->lock); > + return ret; > + } > + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); > + bo_gem->mem_wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; > + } > + > + bo->virtual = bo_gem->mem_wc_virtual; > + > + DBG("bo_map_wc: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, > + bo_gem->mem_wc_virtual); > + > + return 0; > +} > + > +/* To be used in a similar way to mmap_gtt */ > +drm_public int > +drm_intel_gem_bo_map_wc(drm_intel_bo *bo) { > + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; > + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; > + struct drm_i915_gem_set_domain set_domain; > + int ret; > + > + pthread_mutex_lock(&bufmgr_gem->lock); > + > + ret = map_wc(bo); > + if (ret) { > + pthread_mutex_unlock(&bufmgr_gem->lock); > + return ret; > + } > + > + /* Now move it to the GTT domain so that the GPU and CPU > + * caches are flushed and the GPU isn't actively using the > + * buffer. > + * > + * The domain change is done even for the objects which > + * are not bounded. For them first the pages are acquired, > + * before the domain change. > + */ > + VG_CLEAR(set_domain); > + set_domain.handle = bo_gem->gem_handle; > + set_domain.read_domains = I915_GEM_DOMAIN_GTT; > + set_domain.write_domain = I915_GEM_DOMAIN_GTT; > + ret = drmIoctl(bufmgr_gem->fd, > + DRM_IOCTL_I915_GEM_SET_DOMAIN, > + &set_domain); > + if (ret != 0) { > + DBG("%s:%d: Error setting domain %d: %s\n", > + __FILE__, __LINE__, bo_gem->gem_handle, > + strerror(errno)); > + } > + drm_intel_gem_bo_mark_mmaps_incoherent(bo); > + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); > + pthread_mutex_unlock(&bufmgr_gem->lock); > + > + return 0; > +} > + > +drm_public int > +drm_intel_gem_bo_map_wc_unsynchronized(drm_intel_bo *bo) { > + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; > +#ifdef HAVE_VALGRIND > + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; > +#endif > + int ret; > + > + pthread_mutex_lock(&bufmgr_gem->lock); > + > + ret = map_wc(bo); > + if (ret == 0) { > + drm_intel_gem_bo_mark_mmaps_incoherent(bo); > + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); > + } > + > + pthread_mutex_unlock(&bufmgr_gem->lock); > + > + return ret; > +} > + > static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) > { > drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; > @@ -1293,6 +1410,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) > > VG_CLEAR(mmap_arg); > mmap_arg.handle = bo_gem->gem_handle; > + mmap_arg.flags = 0; > mmap_arg.offset = 0; > mmap_arg.size = bo->size; > ret = drmIoctl(bufmgr_gem->fd, > @@ -1553,6 +1671,12 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) > } > > drm_public int > +drm_intel_gem_bo_unmap_wc(drm_intel_bo *bo) > +{ > + return drm_intel_gem_bo_unmap(bo); > +} > + > +drm_public int > drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) > { > return drm_intel_gem_bo_unmap(bo); > -- > 1.9.2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx