Re: sna: Experimental support for write-combining mmaps (wc-mmap)

Sedat Dilek <sedat.dilek@xxxxxxxxx> · Fri, 14 Nov 2014 11:16:26 +0100

On Fri, Nov 14, 2014 at 11:04 AM, Daniel Vetter <daniel@xxxxxxxx> wrote:
> On Thu, Nov 13, 2014 at 04:28:46PM +0100, Sedat Dilek wrote:
>> Hi,
>>
>> what is the status of drm-intel-wc-mmap patchset (#2 + #3)?
>> I have refreshed them on top of drm-intel-coherent-phys-gtt patch (#1).
>> Playing with that againt Linux v3.18-rc4.
>
> Waiting for the misssing testcases and remainig reviews. I tried to
> volunteer Akash for that, but apparently failed.

Hiho Daniel :-),

what can you recommend for testing gfx performance?
Chris did some remark about only 4M(ops) with x11perf.

What are your experiences with diverse compilers?
Here I use a prebuilt-toolchain LLVM/Clang v3.4.2 toolchain from
<llvm.org>, this is my default compiler.
Also I have here a gcc v4.9.2 in a /opt/gcc-4.9 environment.
Default in Ubuntu/precise is gcc v4.6.3.

Note: All my /opt/xorg stuff (currently: libdrm | mesa3d | intelddx)
is compiled with LLVM.

Has intel-gpu-tools some sort of performance testing included?

Any other hints qre very much appreciated.

Thanks!

Regards,
- Sedat -

> -Daniel
>
>>
>> Regards,
>> - Sedat -
>
>> From 20a70ef5865104f35bc8e4cd11ca8ae3b7e6051a Mon Sep 17 00:00:00 2001
>> From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
>> Date: Tue, 4 Nov 2014 04:51:40 -0800
>> Subject: [PATCH 1/3] drm/i915: Make the physical object coherent with GTT
>> MIME-Version: 1.0
>> Content-Type: text/plain; charset=UTF-8
>> Content-Transfer-Encoding: 8bit
>>
>> Currently objects for which the hardware needs a contiguous physical
>> address are allocated a shadow backing storage to satisfy the contraint.
>> This shadow buffer is not wired into the normal obj->pages and so the
>> physical object is incoherent with accesses via the GPU, GTT and CPU. By
>> setting up the appropriate scatter-gather table, we can allow userspace
>> to access the physical object via either a GTT mmaping of or by rendering
>> into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
>> storage coherent with the contiguous shadow is not yet possible.
>> Fortuituously, CPU mmaps of objects requiring physical addresses are not
>> expected to be coherent anyway.
>>
>> This allows the physical constraint of the GEM object to be transparent
>> to userspace and allow it to efficiently render into or update them via
>> the GTT and GPU.
>>
>> v2: Fix leak of pci handle spotted by Ville
>> v3: Remove the now duplicate call to detach_phys_object during free.
>> v4: Wait for rendering before pwrite. As this patch makes it possible to
>> render into the phys object, we should make it correct as well!
>>
>> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
>> Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
>> Reviewed-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
>> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx>
>> Signed-off-by: Daniel Vetter <daniel.vetter@xxxxxxxx>
>> ---
>>  drivers/gpu/drm/i915/i915_dma.c |   3 +
>>  drivers/gpu/drm/i915/i915_drv.h |   6 +-
>>  drivers/gpu/drm/i915/i915_gem.c | 207 +++++++++++++++++++++++++++-------------
>>  include/uapi/drm/i915_drm.h     |   1 +
>>  4 files changed, 150 insertions(+), 67 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
>> index 1403b01..9b08853 100644
>> --- a/drivers/gpu/drm/i915/i915_dma.c
>> +++ b/drivers/gpu/drm/i915/i915_dma.c
>> @@ -1027,6 +1027,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>>       case I915_PARAM_CMD_PARSER_VERSION:
>>               value = i915_cmd_parser_get_version();
>>               break;
>> +     case I915_PARAM_HAS_COHERENT_PHYS_GTT:
>> +             value = 1;
>> +             break;
>>       default:
>>               DRM_DEBUG("Unknown parameter %d\n", param->param);
>>               return -EINVAL;
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 16a6f6d..0417784 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1864,10 +1864,10 @@ struct drm_i915_gem_object {
>>       unsigned long user_pin_count;
>>       struct drm_file *pin_filp;
>>
>> -     /** for phy allocated objects */
>> -     struct drm_dma_handle *phys_handle;
>> -
>>       union {
>> +             /** for phy allocated objects */
>> +             struct drm_dma_handle *phys_handle;
>> +
>>               struct i915_gem_userptr {
>>                       uintptr_t ptr;
>>                       unsigned read_only :1;
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 28f91df..124ec85 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -208,40 +208,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>>       return 0;
>>  }
>>
>> -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
>> +static int
>> +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>>  {
>> -     drm_dma_handle_t *phys = obj->phys_handle;
>> +     struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
>> +     char *vaddr = obj->phys_handle->vaddr;
>> +     struct sg_table *st;
>> +     struct scatterlist *sg;
>> +     int i;
>>
>> -     if (!phys)
>> -             return;
>> +     if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
>> +             return -EINVAL;
>> +
>> +     for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
>> +             struct page *page;
>> +             char *src;
>> +
>> +             page = shmem_read_mapping_page(mapping, i);
>> +             if (IS_ERR(page))
>> +                     return PTR_ERR(page);
>> +
>> +             src = kmap_atomic(page);
>> +             memcpy(vaddr, src, PAGE_SIZE);
>> +             drm_clflush_virt_range(vaddr, PAGE_SIZE);
>> +             kunmap_atomic(src);
>> +
>> +             page_cache_release(page);
>> +             vaddr += PAGE_SIZE;
>> +     }
>> +
>> +     i915_gem_chipset_flush(obj->base.dev);
>> +
>> +     st = kmalloc(sizeof(*st), GFP_KERNEL);
>> +     if (st == NULL)
>> +             return -ENOMEM;
>> +
>> +     if (sg_alloc_table(st, 1, GFP_KERNEL)) {
>> +             kfree(st);
>> +             return -ENOMEM;
>> +     }
>> +
>> +     sg = st->sgl;
>> +     sg->offset = 0;
>> +     sg->length = obj->base.size;
>>
>> -     if (obj->madv == I915_MADV_WILLNEED) {
>> +     sg_dma_address(sg) = obj->phys_handle->busaddr;
>> +     sg_dma_len(sg) = obj->base.size;
>> +
>> +     obj->pages = st;
>> +     obj->has_dma_mapping = true;
>> +     return 0;
>> +}
>> +
>> +static void
>> +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
>> +{
>> +     int ret;
>> +
>> +     BUG_ON(obj->madv == __I915_MADV_PURGED);
>> +
>> +     ret = i915_gem_object_set_to_cpu_domain(obj, true);
>> +     if (ret) {
>> +             /* In the event of a disaster, abandon all caches and
>> +              * hope for the best.
>> +              */
>> +             WARN_ON(ret != -EIO);
>> +             obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
>> +     }
>> +
>> +     if (obj->madv == I915_MADV_DONTNEED)
>> +             obj->dirty = 0;
>> +
>> +     if (obj->dirty) {
>>               struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
>> -             char *vaddr = phys->vaddr;
>> +             char *vaddr = obj->phys_handle->vaddr;
>>               int i;
>>
>>               for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
>> -                     struct page *page = shmem_read_mapping_page(mapping, i);
>> -                     if (!IS_ERR(page)) {
>> -                             char *dst = kmap_atomic(page);
>> -                             memcpy(dst, vaddr, PAGE_SIZE);
>> -                             drm_clflush_virt_range(dst, PAGE_SIZE);
>> -                             kunmap_atomic(dst);
>> -
>> -                             set_page_dirty(page);
>> +                     struct page *page;
>> +                     char *dst;
>> +
>> +                     page = shmem_read_mapping_page(mapping, i);
>> +                     if (IS_ERR(page))
>> +                             continue;
>> +
>> +                     dst = kmap_atomic(page);
>> +                     drm_clflush_virt_range(vaddr, PAGE_SIZE);
>> +                     memcpy(dst, vaddr, PAGE_SIZE);
>> +                     kunmap_atomic(dst);
>> +
>> +                     set_page_dirty(page);
>> +                     if (obj->madv == I915_MADV_WILLNEED)
>>                               mark_page_accessed(page);
>> -                             page_cache_release(page);
>> -                     }
>> +                     page_cache_release(page);
>>                       vaddr += PAGE_SIZE;
>>               }
>> -             i915_gem_chipset_flush(obj->base.dev);
>> +             obj->dirty = 0;
>>       }
>>
>> -#ifdef CONFIG_X86
>> -     set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
>> -#endif
>> -     drm_pci_free(obj->base.dev, phys);
>> -     obj->phys_handle = NULL;
>> +     sg_free_table(obj->pages);
>> +     kfree(obj->pages);
>> +
>> +     obj->has_dma_mapping = false;
>> +}
>> +
>> +static void
>> +i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
>> +{
>> +     drm_pci_free(obj->base.dev, obj->phys_handle);
>> +}
>> +
>> +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
>> +     .get_pages = i915_gem_object_get_pages_phys,
>> +     .put_pages = i915_gem_object_put_pages_phys,
>> +     .release = i915_gem_object_release_phys,
>> +};
>> +
>> +static int
>> +drop_pages(struct drm_i915_gem_object *obj)
>> +{
>> +     struct i915_vma *vma, *next;
>> +     int ret;
>> +
>> +     drm_gem_object_reference(&obj->base);
>> +     list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
>> +             if (i915_vma_unbind(vma))
>> +                     break;
>> +
>> +     ret = i915_gem_object_put_pages(obj);
>> +     drm_gem_object_unreference(&obj->base);
>> +
>> +     return ret;
>>  }
>>
>>  int
>> @@ -249,9 +346,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>>                           int align)
>>  {
>>       drm_dma_handle_t *phys;
>> -     struct address_space *mapping;
>> -     char *vaddr;
>> -     int i;
>> +     int ret;
>>
>>       if (obj->phys_handle) {
>>               if ((unsigned long)obj->phys_handle->vaddr & (align -1))
>> @@ -266,41 +361,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>>       if (obj->base.filp == NULL)
>>               return -EINVAL;
>>
>> +     ret = drop_pages(obj);
>> +     if (ret)
>> +             return ret;
>> +
>>       /* create a new object */
>>       phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
>>       if (!phys)
>>               return -ENOMEM;
>>
>> -     vaddr = phys->vaddr;
>> -#ifdef CONFIG_X86
>> -     set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
>> -#endif
>> -     mapping = file_inode(obj->base.filp)->i_mapping;
>> -     for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
>> -             struct page *page;
>> -             char *src;
>> -
>> -             page = shmem_read_mapping_page(mapping, i);
>> -             if (IS_ERR(page)) {
>> -#ifdef CONFIG_X86
>> -                     set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
>> -#endif
>> -                     drm_pci_free(obj->base.dev, phys);
>> -                     return PTR_ERR(page);
>> -             }
>> -
>> -             src = kmap_atomic(page);
>> -             memcpy(vaddr, src, PAGE_SIZE);
>> -             kunmap_atomic(src);
>> -
>> -             mark_page_accessed(page);
>> -             page_cache_release(page);
>> -
>> -             vaddr += PAGE_SIZE;
>> -     }
>> -
>>       obj->phys_handle = phys;
>> -     return 0;
>> +     obj->ops = &i915_gem_phys_ops;
>> +
>> +     return i915_gem_object_get_pages(obj);
>>  }
>>
>>  static int
>> @@ -311,6 +384,14 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>>       struct drm_device *dev = obj->base.dev;
>>       void *vaddr = obj->phys_handle->vaddr + args->offset;
>>       char __user *user_data = to_user_ptr(args->data_ptr);
>> +     int ret;
>> +
>> +     /* We manually control the domain here and pretend that it
>> +      * remains coherent i.e. in the GTT domain, like shmem_pwrite.
>> +      */
>> +     ret = i915_gem_object_wait_rendering(obj, false);
>> +     if (ret)
>> +             return ret;
>>
>>       if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
>>               unsigned long unwritten;
>> @@ -326,6 +407,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>>                       return -EFAULT;
>>       }
>>
>> +     drm_clflush_virt_range(vaddr, args->size);
>>       i915_gem_chipset_flush(dev);
>>       return 0;
>>  }
>> @@ -1046,11 +1128,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>>        * pread/pwrite currently are reading and writing from the CPU
>>        * perspective, requiring manual detiling by the client.
>>        */
>> -     if (obj->phys_handle) {
>> -             ret = i915_gem_phys_pwrite(obj, args, file);
>> -             goto out;
>> -     }
>> -
>>       if (obj->tiling_mode == I915_TILING_NONE &&
>>           obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
>>           cpu_write_needs_clflush(obj)) {
>> @@ -1060,8 +1137,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>>                * textures). Fallback to the shmem path in that case. */
>>       }
>>
>> -     if (ret == -EFAULT || ret == -ENOSPC)
>> -             ret = i915_gem_shmem_pwrite(dev, obj, args, file);
>> +     if (ret == -EFAULT || ret == -ENOSPC) {
>> +             if (obj->phys_handle)
>> +                     ret = i915_gem_phys_pwrite(obj, args, file);
>> +             else
>> +                     ret = i915_gem_shmem_pwrite(dev, obj, args, file);
>> +     }
>>
>>  out:
>>       drm_gem_object_unreference(&obj->base);
>> @@ -3560,7 +3641,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>>        * Stolen memory is always coherent with the GPU as it is explicitly
>>        * marked as wc by the system, or the system is cache-coherent.
>>        */
>> -     if (obj->stolen)
>> +     if (obj->stolen || obj->phys_handle)
>>               return false;
>>
>>       /* If the GPU is snooping the contents of the CPU cache,
>> @@ -4495,8 +4576,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>>               }
>>       }
>>
>> -     i915_gem_object_detach_phys(obj);
>> -
>>       /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
>>        * before progressing. */
>>       if (obj->stolen)
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index ff57f07..c6b229f 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait {
>>  #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
>>  #define I915_PARAM_HAS_WT                     27
>>  #define I915_PARAM_CMD_PARSER_VERSION         28
>> +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
>>
>>  typedef struct drm_i915_getparam {
>>       int param;
>> --
>> 2.1.3
>>
>
>> From ae9a40e0d04464796cc782d4531f386398e5266a Mon Sep 17 00:00:00 2001
>> From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
>> Date: Mon, 13 Oct 2014 14:08:10 +0100
>> Subject: [PATCH 2/3] drm/i915: Broaden application of set-domain(GTT)
>>
>> Previously, this was restricted to only operate on bound objects - to
>> make pointer access through the GTT to the object coherent with writes
>> to and from the GPU. A second usecase is drm_intel_bo_wait_rendering()
>> which at present does not function unless the object also happens to
>> be bound into the GGTT (on current systems that is becoming increasingly
>> rare, especially for the typical requests from mesa). A third usecase is
>> a future patch wishing to extend the coverage of the GTT domain to
>> include objects not bound into the GGTT but still in its coherent cache
>> domain. For the latter pair of requests, we need to operate on the
>> object regardless of its bind state.
>>
>> v2: After discussion with Akash, we came to the conclusion that the
>> get-pages was required in order for accurate domain tracking in the
>> corner cases (like the shrinker) and also useful for ensuring memory
>> coherency with earlier cached CPU mmaps in case userspace uses exotic
>> cache bypass (non-temporal) instructions.
>>
>> Cc: Akash Goel <akash.goel@xxxxxxxxx>
>> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
>>
>> Conflicts:
>>       drivers/gpu/drm/i915/i915_gem.c
>> ---
>>  drivers/gpu/drm/i915/i915_gem.c | 39 +++++++++++++++++++++------------------
>>  1 file changed, 21 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 124ec85..fe119e1 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -1490,18 +1490,10 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>>       if (ret)
>>               goto unref;
>>
>> -     if (read_domains & I915_GEM_DOMAIN_GTT) {
>> +     if (read_domains & I915_GEM_DOMAIN_GTT)
>>               ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
>> -
>> -             /* Silently promote "you're not bound, there was nothing to do"
>> -              * to success, since the client was just asking us to
>> -              * make sure everything was done.
>> -              */
>> -             if (ret == -EINVAL)
>> -                     ret = 0;
>> -     } else {
>> +     else
>>               ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
>> -     }
>>
>>  unref:
>>       drm_gem_object_unreference(&obj->base);
>> @@ -3722,15 +3714,10 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
>>  int
>>  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>>  {
>> -     struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
>> -     struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
>>       uint32_t old_write_domain, old_read_domains;
>> +     struct i915_vma *vma;
>>       int ret;
>>
>> -     /* Not valid to be called on unbound objects. */
>> -     if (vma == NULL)
>> -             return -EINVAL;
>> -
>>       if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
>>               return 0;
>>
>> @@ -3739,6 +3726,19 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>>               return ret;
>>
>>       i915_gem_object_retire(obj);
>> +
>> +     /* Flush and acquire obj->pages so that we are coherent through
>> +      * direct access in memory with previous cached writes through
>> +      * shmemfs and that our cache domain tracking remains valid.
>> +      * For example, if the obj->filp was moved to swap without us
>> +      * being notified and releasing the pages, we would mistakenly
>> +      * continue to assume that the obj remained out of the CPU cached
>> +      * domain.
>> +      */
>> +     ret = i915_gem_object_get_pages(obj);
>> +     if (ret)
>> +             return ret;
>> +
>>       i915_gem_object_flush_cpu_write_domain(obj, false);
>>
>>       /* Serialise direct access to this object with the barriers for
>> @@ -3770,9 +3770,12 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>>                                           old_write_domain);
>>
>>       /* And bump the LRU for this access */
>> -     if (i915_gem_object_is_inactive(obj))
>> +     vma = i915_gem_obj_to_ggtt(obj);
>> +     if (vma &&
>> +         drm_mm_node_allocated(&vma->node) &&
>> +         i915_gem_object_is_inactive(obj))
>>               list_move_tail(&vma->mm_list,
>> -                            &dev_priv->gtt.base.inactive_list);
>> +                            &to_i915(obj->base.dev)->gtt.base.inactive_list);
>>
>>       return 0;
>>  }
>> --
>> 2.1.3
>>
>
>> From 4882abcb71b4982371a1aad038d7565887138ee5 Mon Sep 17 00:00:00 2001
>> From: Akash Goel <akash.goel@xxxxxxxxx>
>> Date: Thu, 23 Oct 2014 17:55:47 +0100
>> Subject: [PATCH 3/3] drm/i915: Support creation of unbound wc user mappings
>>  for objects
>>
>> This patch provides support to create write-combining virtual mappings of
>> GEM object. It intends to provide the same funtionality of 'mmap_gtt'
>> interface without the constraints and contention of a limited aperture
>> space, but requires clients handles the linear to tile conversion on their
>> own. This is for improving the CPU write operation performance, as with such
>> mapping, writes and reads are almost 50% faster than with mmap_gtt. Similar
>> to the GTT mmapping, unlike the regular CPU mmapping, it avoids the cache
>> flush after update from CPU side, when object is passed onto GPU.  This
>> type of mapping is specially useful in case of sub-region update,
>> i.e. when only a portion of the object is to be updated. Using a CPU mmap
>> in such cases would normally incur a clflush of the whole object, and
>> using a GTT mmapping would likely require eviction of an active object or
>> fence and thus stall. The write-combining CPU mmap avoids both.
>>
>> To ensure the cache coherency, before using this mapping, the GTT domain
>> has been reused here. This provides the required cache flush if the object
>> is in CPU domain or synchronization against the concurrent rendering.
>> Although the access through an uncached mmap should automatically
>> invalidate the cache lines, this may not be true for non-temporal write
>> instructions and also not all pages of the object may be updated at any
>> given point of time through this mapping.  Having a call to get_pages in
>> set_to_gtt_domain function, as added in the earlier patch 'drm/i915:
>> Broaden application of set-domain(GTT)', would guarantee the clflush and
>> so there will be no cachelines holding the data for the object before it
>> is accessed through this map.
>>
>> The drm_i915_gem_mmap structure (for the DRM_I915_GEM_MMAP_IOCTL) has been
>> extended with a new flags field (defaulting to 0 for existent users). In
>> order for userspace to detect the extended ioctl, a new parameter
>> I915_PARAM_HAS_EXT_MMAP has been added for versioning the ioctl interface.
>>
>> v2: Fix error handling, invalid flag detection, renaming (ickle)
>> v3: Adapt to fit "drm/i915: Make the physical object coherent with GTT" (dileks)
>>
>> The new mmapping is exercised by igt/gem_mmap_wc,
>> igt/gem_concurrent_blit and igt/gem_gtt_speed.
>>
>> Change-Id: Ie883942f9e689525f72fe9a8d3780c3a9faa769a
>> Signed-off-by: Akash Goel <akash.goel@xxxxxxxxx>
>> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
>> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx>
>> ---
>>  drivers/gpu/drm/i915/i915_dma.c |  3 +++
>>  drivers/gpu/drm/i915/i915_gem.c | 19 +++++++++++++++++++
>>  include/uapi/drm/i915_drm.h     |  9 +++++++++
>>  3 files changed, 31 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
>> index 9b08853..c034cfc 100644
>> --- a/drivers/gpu/drm/i915/i915_dma.c
>> +++ b/drivers/gpu/drm/i915/i915_dma.c
>> @@ -1030,6 +1030,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>>       case I915_PARAM_HAS_COHERENT_PHYS_GTT:
>>               value = 1;
>>               break;
>> +     case I915_PARAM_MMAP_VERSION:
>> +             value = 1;
>> +             break;
>>       default:
>>               DRM_DEBUG("Unknown parameter %d\n", param->param);
>>               return -EINVAL;
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index fe119e1..48d0bbc 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -1548,6 +1548,12 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>>       struct drm_gem_object *obj;
>>       unsigned long addr;
>>
>> +     if (args->flags & ~(I915_MMAP_WC))
>> +             return -EINVAL;
>> +
>> +     if (args->flags & I915_MMAP_WC && !cpu_has_pat)
>> +             return -ENODEV;
>> +
>>       obj = drm_gem_object_lookup(dev, file, args->handle);
>>       if (obj == NULL)
>>               return -ENOENT;
>> @@ -1563,6 +1569,19 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>>       addr = vm_mmap(obj->filp, 0, args->size,
>>                      PROT_READ | PROT_WRITE, MAP_SHARED,
>>                      args->offset);
>> +     if (args->flags & I915_MMAP_WC) {
>> +             struct mm_struct *mm = current->mm;
>> +             struct vm_area_struct *vma;
>> +
>> +             down_write(&mm->mmap_sem);
>> +             vma = find_vma(mm, addr);
>> +             if (vma)
>> +                     vma->vm_page_prot =
>> +                             pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
>> +             else
>> +                     addr = -ENOMEM;
>> +             up_write(&mm->mmap_sem);
>> +     }
>>       drm_gem_object_unreference_unlocked(obj);
>>       if (IS_ERR((void *)addr))
>>               return addr;
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index c6b229f..f7eaefa 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -341,6 +341,7 @@ typedef struct drm_i915_irq_wait {
>>  #define I915_PARAM_HAS_WT                     27
>>  #define I915_PARAM_CMD_PARSER_VERSION         28
>>  #define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
>> +#define I915_PARAM_MMAP_VERSION          30
>>
>>  typedef struct drm_i915_getparam {
>>       int param;
>> @@ -488,6 +489,14 @@ struct drm_i915_gem_mmap {
>>        * This is a fixed-size type for 32/64 compatibility.
>>        */
>>       __u64 addr_ptr;
>> +
>> +     /**
>> +      * Flags for extended behaviour.
>> +      *
>> +      * Added in version 2.
>> +      */
>> +     __u64 flags;
>> +#define I915_MMAP_WC 0x1
>>  };
>>
>>  struct drm_i915_gem_mmap_gtt {
>> --
>> 2.1.3
>>
>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx