Very old numbers indicate this is a 66% improvement when remapping the entire object for fence contention - due to the elimination of track_pfn_insert and its strcmp). Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Testcase: igt/gem_fence_upload/performance Testcase: igt/gem_mmap_gtt --- drivers/gpu/drm/Makefile | 2 +- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/i915_drv.h | 5 +++ drivers/gpu/drm/i915/i915_gem.c | 50 ++++-------------------- drivers/gpu/drm/i915/i915_mm.c | 85 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 100 insertions(+), 45 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_mm.c diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 0238bf8bc8c3..3ff094171ee5 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -46,7 +46,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ obj-$(CONFIG_DRM_MGA) += mga/ obj-$(CONFIG_DRM_I810) += i810/ -obj-$(CONFIG_DRM_I915) += i915/ +obj-$(CONFIG_DRM_I915) += i915/ obj-$(CONFIG_DRM_MGAG200) += mgag200/ obj-$(CONFIG_DRM_VC4) += vc4/ obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/ diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 3412413408c0..a7da24640e88 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -12,6 +12,7 @@ subdir-ccflags-y += \ i915-y := i915_drv.o \ i915_irq.o \ i915_memcpy.o \ + i915_mm.o \ i915_params.o \ i915_pci.o \ i915_suspend.o \ @@ -113,6 +114,6 @@ i915-y += intel_gvt.o include $(src)/gvt/Makefile endif -obj-$(CONFIG_DRM_I915) += i915.o +obj-$(CONFIG_DRM_I915) += i915.o CFLAGS_i915_trace_points.o := -I$(src) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 05efc0501f3c..0f25302fb517 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3936,6 +3936,11 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) void i915_memcpy_init_early(struct drm_i915_private *dev_priv); bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len); +/* i915_mm.c */ +int remap_io_mapping(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size, + struct io_mapping *iomap); + #define ptr_unpack_bits(ptr, bits) ({ \ unsigned long __v = (unsigned long)(ptr); \ (bits) = __v & ~PAGE_MASK; \ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f12114a35ae3..584144d5d8ea 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1698,7 +1698,6 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) bool write = !!(vmf->flags & FAULT_FLAG_WRITE); struct i915_vma *vma; pgoff_t page_offset; - unsigned long pfn; unsigned int flags; int ret; @@ -1768,48 +1767,13 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) goto err_unpin; /* Finally, remap it using the new GTT offset */ - pfn = ggtt->mappable_base + i915_ggtt_offset(vma); - pfn >>= PAGE_SHIFT; - - if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { - if (!obj->fault_mappable) { - unsigned long size = - min_t(unsigned long, - area->vm_end - area->vm_start, - obj->base.size) >> PAGE_SHIFT; - unsigned long base = area->vm_start; - int i; - - for (i = 0; i < size; i++) { - ret = vm_insert_pfn(area, - base + i * PAGE_SIZE, - pfn + i); - if (ret) - break; - } - } else - ret = vm_insert_pfn(area, - (unsigned long)vmf->virtual_address, - pfn + page_offset); - } else { - /* Overriding existing pages in partial view does not cause - * us any trouble as TLBs are still valid because the fault - * is due to userspace losing part of the mapping or never - * having accessed it before (at this partials' range). - */ - const struct i915_ggtt_view *view = &vma->ggtt_view; - unsigned long base = area->vm_start + - (view->params.partial.offset << PAGE_SHIFT); - unsigned int i; - - for (i = 0; i < view->params.partial.size; i++) { - ret = vm_insert_pfn(area, - base + i * PAGE_SIZE, - pfn + i); - if (ret) - break; - } - } + ret = remap_io_mapping(area, + area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT), + (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, + min_t(u64, vma->size, area->vm_end - area->vm_start), + &ggtt->mappable); + if (ret) + goto err_unpin; obj->fault_mappable = true; err_unpin: diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c new file mode 100644 index 000000000000..b5f531dd57be --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -0,0 +1,85 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/mm.h> +#include <linux/io-mapping.h> +#include <linux/io.h> +#include <linux/uaccess.h> + +#include <asm/pgtable.h> + +#include "i915_drv.h" + +struct remap_pfn { + struct mm_struct *mm; + unsigned long pfn; + pgprot_t prot; +}; + +static int remap_pfn(pte_t *pte, pgtable_t token, + unsigned long addr, void *data) +{ + struct remap_pfn *r = data; + + set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot))); + r->pfn++; + + return 0; +} + +/** + * remap_io_mapping - remap an IO mapping to userspace + * @vma: user vma to map to + * @addr: target user address to start at + * @pfn: physical address of kernel memory + * @size: size of map area + * @iomap: the source io_mapping + * + * Note: this is only safe if the mm semaphore is held when called. + */ +int remap_io_mapping(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size, + struct io_mapping *iomap) +{ + struct remap_pfn r; + int err; + +#define MUST_SET (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) + if (WARN_ON((vma->vm_flags & MUST_SET) != MUST_SET)) + return -EINVAL; +#undef MUST_SET + + r.mm = vma->vm_mm; + r.pfn = pfn; + r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) | + (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK)); + + err = apply_to_page_range(r.mm, addr, size, remap_pfn, &r); + if (unlikely(err)) { + zap_vma_ptes(vma, addr, (r.pfn - pfn) << PAGE_SHIFT); + return err; + } + + return 0; +} -- 2.8.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx