On Fri, Nov 14, 2014 at 11:04 AM, Daniel Vetter <daniel@xxxxxxxx> wrote: > On Thu, Nov 13, 2014 at 04:28:46PM +0100, Sedat Dilek wrote: >> Hi, >> >> what is the status of drm-intel-wc-mmap patchset (#2 + #3)? >> I have refreshed them on top of drm-intel-coherent-phys-gtt patch (#1). >> Playing with that againt Linux v3.18-rc4. > > Waiting for the misssing testcases and remainig reviews. I tried to > volunteer Akash for that, but apparently failed. Hiho Daniel :-), what can you recommend for testing gfx performance? Chris did some remark about only 4M(ops) with x11perf. What are your experiences with diverse compilers? Here I use a prebuilt-toolchain LLVM/Clang v3.4.2 toolchain from <llvm.org>, this is my default compiler. Also I have here a gcc v4.9.2 in a /opt/gcc-4.9 environment. Default in Ubuntu/precise is gcc v4.6.3. Note: All my /opt/xorg stuff (currently: libdrm | mesa3d | intelddx) is compiled with LLVM. Has intel-gpu-tools some sort of performance testing included? Any other hints qre very much appreciated. Thanks! Regards, - Sedat - > -Daniel > >> >> Regards, >> - Sedat - > >> From 20a70ef5865104f35bc8e4cd11ca8ae3b7e6051a Mon Sep 17 00:00:00 2001 >> From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >> Date: Tue, 4 Nov 2014 04:51:40 -0800 >> Subject: [PATCH 1/3] drm/i915: Make the physical object coherent with GTT >> MIME-Version: 1.0 >> Content-Type: text/plain; charset=UTF-8 >> Content-Transfer-Encoding: 8bit >> >> Currently objects for which the hardware needs a contiguous physical >> address are allocated a shadow backing storage to satisfy the contraint. >> This shadow buffer is not wired into the normal obj->pages and so the >> physical object is incoherent with accesses via the GPU, GTT and CPU. By >> setting up the appropriate scatter-gather table, we can allow userspace >> to access the physical object via either a GTT mmaping of or by rendering >> into the GEM bo. However, keeping the CPU mmap of the shmemfs backing >> storage coherent with the contiguous shadow is not yet possible. >> Fortuituously, CPU mmaps of objects requiring physical addresses are not >> expected to be coherent anyway. >> >> This allows the physical constraint of the GEM object to be transparent >> to userspace and allow it to efficiently render into or update them via >> the GTT and GPU. >> >> v2: Fix leak of pci handle spotted by Ville >> v3: Remove the now duplicate call to detach_phys_object during free. >> v4: Wait for rendering before pwrite. As this patch makes it possible to >> render into the phys object, we should make it correct as well! >> >> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >> Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> >> Reviewed-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> >> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> >> Signed-off-by: Daniel Vetter <daniel.vetter@xxxxxxxx> >> --- >> drivers/gpu/drm/i915/i915_dma.c | 3 + >> drivers/gpu/drm/i915/i915_drv.h | 6 +- >> drivers/gpu/drm/i915/i915_gem.c | 207 +++++++++++++++++++++++++++------------- >> include/uapi/drm/i915_drm.h | 1 + >> 4 files changed, 150 insertions(+), 67 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c >> index 1403b01..9b08853 100644 >> --- a/drivers/gpu/drm/i915/i915_dma.c >> +++ b/drivers/gpu/drm/i915/i915_dma.c >> @@ -1027,6 +1027,9 @@ static int i915_getparam(struct drm_device *dev, void *data, >> case I915_PARAM_CMD_PARSER_VERSION: >> value = i915_cmd_parser_get_version(); >> break; >> + case I915_PARAM_HAS_COHERENT_PHYS_GTT: >> + value = 1; >> + break; >> default: >> DRM_DEBUG("Unknown parameter %d\n", param->param); >> return -EINVAL; >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 16a6f6d..0417784 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -1864,10 +1864,10 @@ struct drm_i915_gem_object { >> unsigned long user_pin_count; >> struct drm_file *pin_filp; >> >> - /** for phy allocated objects */ >> - struct drm_dma_handle *phys_handle; >> - >> union { >> + /** for phy allocated objects */ >> + struct drm_dma_handle *phys_handle; >> + >> struct i915_gem_userptr { >> uintptr_t ptr; >> unsigned read_only :1; >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >> index 28f91df..124ec85 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -208,40 +208,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, >> return 0; >> } >> >> -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj) >> +static int >> +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) >> { >> - drm_dma_handle_t *phys = obj->phys_handle; >> + struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; >> + char *vaddr = obj->phys_handle->vaddr; >> + struct sg_table *st; >> + struct scatterlist *sg; >> + int i; >> >> - if (!phys) >> - return; >> + if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) >> + return -EINVAL; >> + >> + for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { >> + struct page *page; >> + char *src; >> + >> + page = shmem_read_mapping_page(mapping, i); >> + if (IS_ERR(page)) >> + return PTR_ERR(page); >> + >> + src = kmap_atomic(page); >> + memcpy(vaddr, src, PAGE_SIZE); >> + drm_clflush_virt_range(vaddr, PAGE_SIZE); >> + kunmap_atomic(src); >> + >> + page_cache_release(page); >> + vaddr += PAGE_SIZE; >> + } >> + >> + i915_gem_chipset_flush(obj->base.dev); >> + >> + st = kmalloc(sizeof(*st), GFP_KERNEL); >> + if (st == NULL) >> + return -ENOMEM; >> + >> + if (sg_alloc_table(st, 1, GFP_KERNEL)) { >> + kfree(st); >> + return -ENOMEM; >> + } >> + >> + sg = st->sgl; >> + sg->offset = 0; >> + sg->length = obj->base.size; >> >> - if (obj->madv == I915_MADV_WILLNEED) { >> + sg_dma_address(sg) = obj->phys_handle->busaddr; >> + sg_dma_len(sg) = obj->base.size; >> + >> + obj->pages = st; >> + obj->has_dma_mapping = true; >> + return 0; >> +} >> + >> +static void >> +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) >> +{ >> + int ret; >> + >> + BUG_ON(obj->madv == __I915_MADV_PURGED); >> + >> + ret = i915_gem_object_set_to_cpu_domain(obj, true); >> + if (ret) { >> + /* In the event of a disaster, abandon all caches and >> + * hope for the best. >> + */ >> + WARN_ON(ret != -EIO); >> + obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; >> + } >> + >> + if (obj->madv == I915_MADV_DONTNEED) >> + obj->dirty = 0; >> + >> + if (obj->dirty) { >> struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; >> - char *vaddr = phys->vaddr; >> + char *vaddr = obj->phys_handle->vaddr; >> int i; >> >> for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { >> - struct page *page = shmem_read_mapping_page(mapping, i); >> - if (!IS_ERR(page)) { >> - char *dst = kmap_atomic(page); >> - memcpy(dst, vaddr, PAGE_SIZE); >> - drm_clflush_virt_range(dst, PAGE_SIZE); >> - kunmap_atomic(dst); >> - >> - set_page_dirty(page); >> + struct page *page; >> + char *dst; >> + >> + page = shmem_read_mapping_page(mapping, i); >> + if (IS_ERR(page)) >> + continue; >> + >> + dst = kmap_atomic(page); >> + drm_clflush_virt_range(vaddr, PAGE_SIZE); >> + memcpy(dst, vaddr, PAGE_SIZE); >> + kunmap_atomic(dst); >> + >> + set_page_dirty(page); >> + if (obj->madv == I915_MADV_WILLNEED) >> mark_page_accessed(page); >> - page_cache_release(page); >> - } >> + page_cache_release(page); >> vaddr += PAGE_SIZE; >> } >> - i915_gem_chipset_flush(obj->base.dev); >> + obj->dirty = 0; >> } >> >> -#ifdef CONFIG_X86 >> - set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); >> -#endif >> - drm_pci_free(obj->base.dev, phys); >> - obj->phys_handle = NULL; >> + sg_free_table(obj->pages); >> + kfree(obj->pages); >> + >> + obj->has_dma_mapping = false; >> +} >> + >> +static void >> +i915_gem_object_release_phys(struct drm_i915_gem_object *obj) >> +{ >> + drm_pci_free(obj->base.dev, obj->phys_handle); >> +} >> + >> +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { >> + .get_pages = i915_gem_object_get_pages_phys, >> + .put_pages = i915_gem_object_put_pages_phys, >> + .release = i915_gem_object_release_phys, >> +}; >> + >> +static int >> +drop_pages(struct drm_i915_gem_object *obj) >> +{ >> + struct i915_vma *vma, *next; >> + int ret; >> + >> + drm_gem_object_reference(&obj->base); >> + list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) >> + if (i915_vma_unbind(vma)) >> + break; >> + >> + ret = i915_gem_object_put_pages(obj); >> + drm_gem_object_unreference(&obj->base); >> + >> + return ret; >> } >> >> int >> @@ -249,9 +346,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, >> int align) >> { >> drm_dma_handle_t *phys; >> - struct address_space *mapping; >> - char *vaddr; >> - int i; >> + int ret; >> >> if (obj->phys_handle) { >> if ((unsigned long)obj->phys_handle->vaddr & (align -1)) >> @@ -266,41 +361,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, >> if (obj->base.filp == NULL) >> return -EINVAL; >> >> + ret = drop_pages(obj); >> + if (ret) >> + return ret; >> + >> /* create a new object */ >> phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); >> if (!phys) >> return -ENOMEM; >> >> - vaddr = phys->vaddr; >> -#ifdef CONFIG_X86 >> - set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE); >> -#endif >> - mapping = file_inode(obj->base.filp)->i_mapping; >> - for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { >> - struct page *page; >> - char *src; >> - >> - page = shmem_read_mapping_page(mapping, i); >> - if (IS_ERR(page)) { >> -#ifdef CONFIG_X86 >> - set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); >> -#endif >> - drm_pci_free(obj->base.dev, phys); >> - return PTR_ERR(page); >> - } >> - >> - src = kmap_atomic(page); >> - memcpy(vaddr, src, PAGE_SIZE); >> - kunmap_atomic(src); >> - >> - mark_page_accessed(page); >> - page_cache_release(page); >> - >> - vaddr += PAGE_SIZE; >> - } >> - >> obj->phys_handle = phys; >> - return 0; >> + obj->ops = &i915_gem_phys_ops; >> + >> + return i915_gem_object_get_pages(obj); >> } >> >> static int >> @@ -311,6 +384,14 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, >> struct drm_device *dev = obj->base.dev; >> void *vaddr = obj->phys_handle->vaddr + args->offset; >> char __user *user_data = to_user_ptr(args->data_ptr); >> + int ret; >> + >> + /* We manually control the domain here and pretend that it >> + * remains coherent i.e. in the GTT domain, like shmem_pwrite. >> + */ >> + ret = i915_gem_object_wait_rendering(obj, false); >> + if (ret) >> + return ret; >> >> if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { >> unsigned long unwritten; >> @@ -326,6 +407,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, >> return -EFAULT; >> } >> >> + drm_clflush_virt_range(vaddr, args->size); >> i915_gem_chipset_flush(dev); >> return 0; >> } >> @@ -1046,11 +1128,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, >> * pread/pwrite currently are reading and writing from the CPU >> * perspective, requiring manual detiling by the client. >> */ >> - if (obj->phys_handle) { >> - ret = i915_gem_phys_pwrite(obj, args, file); >> - goto out; >> - } >> - >> if (obj->tiling_mode == I915_TILING_NONE && >> obj->base.write_domain != I915_GEM_DOMAIN_CPU && >> cpu_write_needs_clflush(obj)) { >> @@ -1060,8 +1137,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, >> * textures). Fallback to the shmem path in that case. */ >> } >> >> - if (ret == -EFAULT || ret == -ENOSPC) >> - ret = i915_gem_shmem_pwrite(dev, obj, args, file); >> + if (ret == -EFAULT || ret == -ENOSPC) { >> + if (obj->phys_handle) >> + ret = i915_gem_phys_pwrite(obj, args, file); >> + else >> + ret = i915_gem_shmem_pwrite(dev, obj, args, file); >> + } >> >> out: >> drm_gem_object_unreference(&obj->base); >> @@ -3560,7 +3641,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, >> * Stolen memory is always coherent with the GPU as it is explicitly >> * marked as wc by the system, or the system is cache-coherent. >> */ >> - if (obj->stolen) >> + if (obj->stolen || obj->phys_handle) >> return false; >> >> /* If the GPU is snooping the contents of the CPU cache, >> @@ -4495,8 +4576,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) >> } >> } >> >> - i915_gem_object_detach_phys(obj); >> - >> /* Stolen objects don't hold a ref, but do hold pin count. Fix that up >> * before progressing. */ >> if (obj->stolen) >> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >> index ff57f07..c6b229f 100644 >> --- a/include/uapi/drm/i915_drm.h >> +++ b/include/uapi/drm/i915_drm.h >> @@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait { >> #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 >> #define I915_PARAM_HAS_WT 27 >> #define I915_PARAM_CMD_PARSER_VERSION 28 >> +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 >> >> typedef struct drm_i915_getparam { >> int param; >> -- >> 2.1.3 >> > >> From ae9a40e0d04464796cc782d4531f386398e5266a Mon Sep 17 00:00:00 2001 >> From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >> Date: Mon, 13 Oct 2014 14:08:10 +0100 >> Subject: [PATCH 2/3] drm/i915: Broaden application of set-domain(GTT) >> >> Previously, this was restricted to only operate on bound objects - to >> make pointer access through the GTT to the object coherent with writes >> to and from the GPU. A second usecase is drm_intel_bo_wait_rendering() >> which at present does not function unless the object also happens to >> be bound into the GGTT (on current systems that is becoming increasingly >> rare, especially for the typical requests from mesa). A third usecase is >> a future patch wishing to extend the coverage of the GTT domain to >> include objects not bound into the GGTT but still in its coherent cache >> domain. For the latter pair of requests, we need to operate on the >> object regardless of its bind state. >> >> v2: After discussion with Akash, we came to the conclusion that the >> get-pages was required in order for accurate domain tracking in the >> corner cases (like the shrinker) and also useful for ensuring memory >> coherency with earlier cached CPU mmaps in case userspace uses exotic >> cache bypass (non-temporal) instructions. >> >> Cc: Akash Goel <akash.goel@xxxxxxxxx> >> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >> >> Conflicts: >> drivers/gpu/drm/i915/i915_gem.c >> --- >> drivers/gpu/drm/i915/i915_gem.c | 39 +++++++++++++++++++++------------------ >> 1 file changed, 21 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >> index 124ec85..fe119e1 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -1490,18 +1490,10 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, >> if (ret) >> goto unref; >> >> - if (read_domains & I915_GEM_DOMAIN_GTT) { >> + if (read_domains & I915_GEM_DOMAIN_GTT) >> ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); >> - >> - /* Silently promote "you're not bound, there was nothing to do" >> - * to success, since the client was just asking us to >> - * make sure everything was done. >> - */ >> - if (ret == -EINVAL) >> - ret = 0; >> - } else { >> + else >> ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); >> - } >> >> unref: >> drm_gem_object_unreference(&obj->base); >> @@ -3722,15 +3714,10 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, >> int >> i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) >> { >> - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; >> - struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); >> uint32_t old_write_domain, old_read_domains; >> + struct i915_vma *vma; >> int ret; >> >> - /* Not valid to be called on unbound objects. */ >> - if (vma == NULL) >> - return -EINVAL; >> - >> if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) >> return 0; >> >> @@ -3739,6 +3726,19 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) >> return ret; >> >> i915_gem_object_retire(obj); >> + >> + /* Flush and acquire obj->pages so that we are coherent through >> + * direct access in memory with previous cached writes through >> + * shmemfs and that our cache domain tracking remains valid. >> + * For example, if the obj->filp was moved to swap without us >> + * being notified and releasing the pages, we would mistakenly >> + * continue to assume that the obj remained out of the CPU cached >> + * domain. >> + */ >> + ret = i915_gem_object_get_pages(obj); >> + if (ret) >> + return ret; >> + >> i915_gem_object_flush_cpu_write_domain(obj, false); >> >> /* Serialise direct access to this object with the barriers for >> @@ -3770,9 +3770,12 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) >> old_write_domain); >> >> /* And bump the LRU for this access */ >> - if (i915_gem_object_is_inactive(obj)) >> + vma = i915_gem_obj_to_ggtt(obj); >> + if (vma && >> + drm_mm_node_allocated(&vma->node) && >> + i915_gem_object_is_inactive(obj)) >> list_move_tail(&vma->mm_list, >> - &dev_priv->gtt.base.inactive_list); >> + &to_i915(obj->base.dev)->gtt.base.inactive_list); >> >> return 0; >> } >> -- >> 2.1.3 >> > >> From 4882abcb71b4982371a1aad038d7565887138ee5 Mon Sep 17 00:00:00 2001 >> From: Akash Goel <akash.goel@xxxxxxxxx> >> Date: Thu, 23 Oct 2014 17:55:47 +0100 >> Subject: [PATCH 3/3] drm/i915: Support creation of unbound wc user mappings >> for objects >> >> This patch provides support to create write-combining virtual mappings of >> GEM object. It intends to provide the same funtionality of 'mmap_gtt' >> interface without the constraints and contention of a limited aperture >> space, but requires clients handles the linear to tile conversion on their >> own. This is for improving the CPU write operation performance, as with such >> mapping, writes and reads are almost 50% faster than with mmap_gtt. Similar >> to the GTT mmapping, unlike the regular CPU mmapping, it avoids the cache >> flush after update from CPU side, when object is passed onto GPU. This >> type of mapping is specially useful in case of sub-region update, >> i.e. when only a portion of the object is to be updated. Using a CPU mmap >> in such cases would normally incur a clflush of the whole object, and >> using a GTT mmapping would likely require eviction of an active object or >> fence and thus stall. The write-combining CPU mmap avoids both. >> >> To ensure the cache coherency, before using this mapping, the GTT domain >> has been reused here. This provides the required cache flush if the object >> is in CPU domain or synchronization against the concurrent rendering. >> Although the access through an uncached mmap should automatically >> invalidate the cache lines, this may not be true for non-temporal write >> instructions and also not all pages of the object may be updated at any >> given point of time through this mapping. Having a call to get_pages in >> set_to_gtt_domain function, as added in the earlier patch 'drm/i915: >> Broaden application of set-domain(GTT)', would guarantee the clflush and >> so there will be no cachelines holding the data for the object before it >> is accessed through this map. >> >> The drm_i915_gem_mmap structure (for the DRM_I915_GEM_MMAP_IOCTL) has been >> extended with a new flags field (defaulting to 0 for existent users). In >> order for userspace to detect the extended ioctl, a new parameter >> I915_PARAM_HAS_EXT_MMAP has been added for versioning the ioctl interface. >> >> v2: Fix error handling, invalid flag detection, renaming (ickle) >> v3: Adapt to fit "drm/i915: Make the physical object coherent with GTT" (dileks) >> >> The new mmapping is exercised by igt/gem_mmap_wc, >> igt/gem_concurrent_blit and igt/gem_gtt_speed. >> >> Change-Id: Ie883942f9e689525f72fe9a8d3780c3a9faa769a >> Signed-off-by: Akash Goel <akash.goel@xxxxxxxxx> >> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> >> --- >> drivers/gpu/drm/i915/i915_dma.c | 3 +++ >> drivers/gpu/drm/i915/i915_gem.c | 19 +++++++++++++++++++ >> include/uapi/drm/i915_drm.h | 9 +++++++++ >> 3 files changed, 31 insertions(+) >> >> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c >> index 9b08853..c034cfc 100644 >> --- a/drivers/gpu/drm/i915/i915_dma.c >> +++ b/drivers/gpu/drm/i915/i915_dma.c >> @@ -1030,6 +1030,9 @@ static int i915_getparam(struct drm_device *dev, void *data, >> case I915_PARAM_HAS_COHERENT_PHYS_GTT: >> value = 1; >> break; >> + case I915_PARAM_MMAP_VERSION: >> + value = 1; >> + break; >> default: >> DRM_DEBUG("Unknown parameter %d\n", param->param); >> return -EINVAL; >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >> index fe119e1..48d0bbc 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -1548,6 +1548,12 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, >> struct drm_gem_object *obj; >> unsigned long addr; >> >> + if (args->flags & ~(I915_MMAP_WC)) >> + return -EINVAL; >> + >> + if (args->flags & I915_MMAP_WC && !cpu_has_pat) >> + return -ENODEV; >> + >> obj = drm_gem_object_lookup(dev, file, args->handle); >> if (obj == NULL) >> return -ENOENT; >> @@ -1563,6 +1569,19 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, >> addr = vm_mmap(obj->filp, 0, args->size, >> PROT_READ | PROT_WRITE, MAP_SHARED, >> args->offset); >> + if (args->flags & I915_MMAP_WC) { >> + struct mm_struct *mm = current->mm; >> + struct vm_area_struct *vma; >> + >> + down_write(&mm->mmap_sem); >> + vma = find_vma(mm, addr); >> + if (vma) >> + vma->vm_page_prot = >> + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); >> + else >> + addr = -ENOMEM; >> + up_write(&mm->mmap_sem); >> + } >> drm_gem_object_unreference_unlocked(obj); >> if (IS_ERR((void *)addr)) >> return addr; >> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >> index c6b229f..f7eaefa 100644 >> --- a/include/uapi/drm/i915_drm.h >> +++ b/include/uapi/drm/i915_drm.h >> @@ -341,6 +341,7 @@ typedef struct drm_i915_irq_wait { >> #define I915_PARAM_HAS_WT 27 >> #define I915_PARAM_CMD_PARSER_VERSION 28 >> #define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 >> +#define I915_PARAM_MMAP_VERSION 30 >> >> typedef struct drm_i915_getparam { >> int param; >> @@ -488,6 +489,14 @@ struct drm_i915_gem_mmap { >> * This is a fixed-size type for 32/64 compatibility. >> */ >> __u64 addr_ptr; >> + >> + /** >> + * Flags for extended behaviour. >> + * >> + * Added in version 2. >> + */ >> + __u64 flags; >> +#define I915_MMAP_WC 0x1 >> }; >> >> struct drm_i915_gem_mmap_gtt { >> -- >> 2.1.3 >> > >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx > > > -- > Daniel Vetter > Software Engineer, Intel Corporation > +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx