[PATCH 2/2] drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Very old numbers indicate this is a 66% improvement when remapping the
entire object for fence contention - due to the elimination of
track_pfn_insert and its strcmp).

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Testcase: igt/gem_fence_upload/performance
Testcase: igt/gem_mmap_gtt
---
 drivers/gpu/drm/Makefile        |  2 +-
 drivers/gpu/drm/i915/Makefile   |  3 +-
 drivers/gpu/drm/i915/i915_drv.h |  5 +++
 drivers/gpu/drm/i915/i915_gem.c | 50 ++++--------------------
 drivers/gpu/drm/i915/i915_mm.c  | 85 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 100 insertions(+), 45 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_mm.c

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 0238bf8bc8c3..3ff094171ee5 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -46,7 +46,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
 obj-$(CONFIG_DRM_MGA)	+= mga/
 obj-$(CONFIG_DRM_I810)	+= i810/
-obj-$(CONFIG_DRM_I915)  += i915/
+obj-$(CONFIG_DRM_I915)	+= i915/
 obj-$(CONFIG_DRM_MGAG200) += mgag200/
 obj-$(CONFIG_DRM_VC4)  += vc4/
 obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 3412413408c0..a7da24640e88 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -12,6 +12,7 @@ subdir-ccflags-y += \
 i915-y := i915_drv.o \
 	  i915_irq.o \
 	  i915_memcpy.o \
+	  i915_mm.o \
 	  i915_params.o \
 	  i915_pci.o \
           i915_suspend.o \
@@ -113,6 +114,6 @@ i915-y += intel_gvt.o
 include $(src)/gvt/Makefile
 endif
 
-obj-$(CONFIG_DRM_I915)  += i915.o
+obj-$(CONFIG_DRM_I915) += i915.o
 
 CFLAGS_i915_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 05efc0501f3c..0f25302fb517 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3936,6 +3936,11 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
 bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
 
+/* i915_mm.c */
+int remap_io_mapping(struct vm_area_struct *vma,
+		     unsigned long addr, unsigned long pfn, unsigned long size,
+		     struct io_mapping *iomap);
+
 #define ptr_unpack_bits(ptr, bits) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
 	(bits) = __v & ~PAGE_MASK;					\
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f12114a35ae3..584144d5d8ea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1698,7 +1698,6 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 	struct i915_vma *vma;
 	pgoff_t page_offset;
-	unsigned long pfn;
 	unsigned int flags;
 	int ret;
 
@@ -1768,48 +1767,13 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 		goto err_unpin;
 
 	/* Finally, remap it using the new GTT offset */
-	pfn = ggtt->mappable_base + i915_ggtt_offset(vma);
-	pfn >>= PAGE_SHIFT;
-
-	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
-		if (!obj->fault_mappable) {
-			unsigned long size =
-				min_t(unsigned long,
-				      area->vm_end - area->vm_start,
-				      obj->base.size) >> PAGE_SHIFT;
-			unsigned long base = area->vm_start;
-			int i;
-
-			for (i = 0; i < size; i++) {
-				ret = vm_insert_pfn(area,
-						    base + i * PAGE_SIZE,
-						    pfn + i);
-				if (ret)
-					break;
-			}
-		} else
-			ret = vm_insert_pfn(area,
-					    (unsigned long)vmf->virtual_address,
-					    pfn + page_offset);
-	} else {
-		/* Overriding existing pages in partial view does not cause
-		 * us any trouble as TLBs are still valid because the fault
-		 * is due to userspace losing part of the mapping or never
-		 * having accessed it before (at this partials' range).
-		 */
-		const struct i915_ggtt_view *view = &vma->ggtt_view;
-		unsigned long base = area->vm_start +
-			(view->params.partial.offset << PAGE_SHIFT);
-		unsigned int i;
-
-		for (i = 0; i < view->params.partial.size; i++) {
-			ret = vm_insert_pfn(area,
-					    base + i * PAGE_SIZE,
-					    pfn + i);
-			if (ret)
-				break;
-		}
-	}
+	ret = remap_io_mapping(area,
+			       area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
+			       (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
+			       min_t(u64, vma->size, area->vm_end - area->vm_start),
+			       &ggtt->mappable);
+	if (ret)
+		goto err_unpin;
 
 	obj->fault_mappable = true;
 err_unpin:
diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
new file mode 100644
index 000000000000..b5f531dd57be
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/io-mapping.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <asm/pgtable.h>
+
+#include "i915_drv.h"
+
+struct remap_pfn {
+	struct mm_struct *mm;
+	unsigned long pfn;
+	pgprot_t prot;
+};
+
+static int remap_pfn(pte_t *pte, pgtable_t token,
+		     unsigned long addr, void *data)
+{
+	struct remap_pfn *r = data;
+
+	set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot)));
+	r->pfn++;
+
+	return 0;
+}
+
+/**
+ * remap_io_mapping - remap an IO mapping to userspace
+ * @vma: user vma to map to
+ * @addr: target user address to start at
+ * @pfn: physical address of kernel memory
+ * @size: size of map area
+ * @iomap: the source io_mapping
+ *
+ *  Note: this is only safe if the mm semaphore is held when called.
+ */
+int remap_io_mapping(struct vm_area_struct *vma,
+		     unsigned long addr, unsigned long pfn, unsigned long size,
+		     struct io_mapping *iomap)
+{
+	struct remap_pfn r;
+	int err;
+
+#define MUST_SET (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
+	if (WARN_ON((vma->vm_flags & MUST_SET) != MUST_SET))
+		return -EINVAL;
+#undef MUST_SET
+
+	r.mm = vma->vm_mm;
+	r.pfn = pfn;
+	r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) |
+			  (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK));
+
+	err = apply_to_page_range(r.mm, addr, size, remap_pfn, &r);
+	if (unlikely(err)) {
+		zap_vma_ptes(vma, addr, (r.pfn - pfn) << PAGE_SHIFT);
+		return err;
+	}
+
+	return 0;
+}
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux