On an Ivybridge i7-3720qm with 1600MHz DDR3, with 32 fences, Upload rate for 2 linear surfaces: 8134MiB/s -> 8154MiB/s Upload rate for 2 tiled surfaces: 8625MiB/s -> 8632MiB/s Upload rate for 4 linear surfaces: 8127MiB/s -> 8134MiB/s Upload rate for 4 tiled surfaces: 8602MiB/s -> 8629MiB/s Upload rate for 8 linear surfaces: 8124MiB/s -> 8137MiB/s Upload rate for 8 tiled surfaces: 8603MiB/s -> 8624MiB/s Upload rate for 16 linear surfaces: 8123MiB/s -> 8128MiB/s Upload rate for 16 tiled surfaces: 8606MiB/s -> 8618MiB/s Upload rate for 32 linear surfaces: 8121MiB/s -> 8128MiB/s Upload rate for 32 tiled surfaces: 8605MiB/s -> 8614MiB/s Upload rate for 64 linear surfaces: 8121MiB/s -> 8127MiB/s Upload rate for 64 tiled surfaces: 3017MiB/s -> 5202MiB/s Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Testcase: igt/gem_fence_upload/performance Testcase: igt/gem_mmap_gtt --- drivers/gpu/drm/Makefile | 2 +- drivers/gpu/drm/i915/Makefile | 5 +- drivers/gpu/drm/i915/i915_drv.h | 4 ++ drivers/gpu/drm/i915/i915_gem.c | 46 +++----------- drivers/gpu/drm/i915/i915_memory.c | 122 +++++++++++++++++++++++++++++++++++++ 5 files changed, 138 insertions(+), 41 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_memory.c diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index f858aa25fbb2..6834d0e33741 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -43,7 +43,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ obj-$(CONFIG_DRM_MGA) += mga/ obj-$(CONFIG_DRM_I810) += i810/ -obj-$(CONFIG_DRM_I915) += i915/ +obj-y += i915/ obj-$(CONFIG_DRM_MGAG200) += mgag200/ obj-$(CONFIG_DRM_VC4) += vc4/ obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/ diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 79d657f29241..a362425ef862 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -100,6 +100,9 @@ i915-y += i915_vgpu.o # legacy horrors i915-y += i915_dma.o -obj-$(CONFIG_DRM_I915) += i915.o +obj-$(CONFIG_DRM_I915) += i915.o +ifdef CONFIG_DRM_I915 +obj-y += i915_memory.o +endif CFLAGS_i915_trace_points.o := -I$(src) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 45b8cbdfab55..e6f49175af1b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3447,4 +3447,8 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) return false; } +int remap_io_mapping(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size, + struct io_mapping *iomap); + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7e321fdd90d2..1fa4752682d6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1449,7 +1449,6 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct drm_i915_private *dev_priv = dev->dev_private; struct i915_vma *ggtt; pgoff_t page_offset; - unsigned long pfn; int ret = 0; bool write = !!(vmf->flags & FAULT_FLAG_WRITE); @@ -1517,44 +1516,13 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) goto unpin; /* Finally, remap it using the new GTT offset */ - pfn = dev_priv->gtt.mappable_base + ggtt->node.start; - pfn >>= PAGE_SHIFT; - - if (ggtt->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { - if (!obj->fault_mappable) { - unsigned long size = min_t(unsigned long, - vma->vm_end - vma->vm_start, - obj->base.size); - int i; - - for (i = 0; i < size >> PAGE_SHIFT; i++) { - ret = vm_insert_pfn(vma, - (unsigned long)vma->vm_start + i * PAGE_SIZE, - pfn + i); - if (ret) - break; - } - } else - ret = vm_insert_pfn(vma, - (unsigned long)vmf->virtual_address, - pfn + page_offset); - } else { - /* Overriding existing pages in partial view does not cause - * us any trouble as TLBs are still valid because the fault - * is due to userspace losing part of the mapping or never - * having accessed it before (at this partials' range). - */ - const struct i915_ggtt_view *view = &ggtt->ggtt_view; - unsigned long base = vma->vm_start + - (view->params.partial.offset << PAGE_SHIFT); - unsigned int i; - - for (i = 0; i < view->params.partial.size; i++) { - ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); - if (ret) - break; - } - } + ret = remap_io_mapping(vma, + vma->vm_start + (ggtt->ggtt_view.params.partial.offset << PAGE_SHIFT), + (dev_priv->gtt.mappable_base + ggtt->node.start) >> PAGE_SHIFT, + min_t(u64, ggtt->size, vma->vm_end - vma->vm_start), + &dev_priv->gtt.mappable); + if (ret) + goto unpin; obj->fault_mappable = true; unpin: diff --git a/drivers/gpu/drm/i915/i915_memory.c b/drivers/gpu/drm/i915/i915_memory.c new file mode 100644 index 000000000000..f684576022f3 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_memory.c @@ -0,0 +1,122 @@ +#include <linux/mm.h> +#include <linux/io-mapping.h> + +#include <asm/io.h> +#include <asm/pgalloc.h> +#include <asm/uaccess.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/pgtable.h> + +#include "i915_drv.h" + +struct remap_pfn { + struct mm_struct *mm; + unsigned long addr; + unsigned long pfn; + pgprot_t prot; +}; + +static inline void remap_pfn(struct remap_pfn *r, pte_t *pte) +{ + set_pte_at(r->mm, r->addr, pte, + pte_mkspecial(pfn_pte(r->pfn, r->prot))); + r->pfn++; + r->addr += PAGE_SIZE; +} + +static inline int remap_pte_range(struct remap_pfn *r, pmd_t *pmd, unsigned long end) +{ + pte_t *pte; + spinlock_t *ptl; + + pte = pte_alloc_map_lock(r->mm, pmd, r->addr, &ptl); + if (!pte) + return -ENOMEM; + + arch_enter_lazy_mmu_mode(); + do + remap_pfn(r, pte++); + while (r->addr < end); + arch_leave_lazy_mmu_mode(); + + pte_unmap_unlock(pte - 1, ptl); + return 0; +} + +static inline int remap_pmd_range(struct remap_pfn *r, pud_t *pud, unsigned long end) +{ + pmd_t *pmd; + int err; + + pmd = pmd_alloc(r->mm, pud, r->addr); + if (!pmd) + return -ENOMEM; + VM_BUG_ON(pmd_trans_huge(*pmd)); + + do + err = remap_pte_range(r, pmd++, pmd_addr_end(r->addr, end)); + while (err == 0 && r->addr < end); + + return err; +} + +static inline int remap_pud_range(struct remap_pfn *r, pgd_t *pgd, unsigned long end) +{ + pud_t *pud; + int err; + + pud = pud_alloc(r->mm, pgd, r->addr); + if (!pud) + return -ENOMEM; + + do + err = remap_pmd_range(r, pud++, pud_addr_end(r->addr, end)); + while (err == 0 && r->addr < end); + + return err; +} + +/** + * remap_io_mapping - remap an IO mapping to userspace + * @vma: user vma to map to + * @addr: target user address to start at + * @pfn: physical address of kernel memory + * @size: size of map area + * @iomap: the source io_mapping + * + * Note: this is only safe if the mm semaphore is held when called. + */ +int remap_io_mapping(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size, + struct io_mapping *iomap) +{ + unsigned long end = addr + PAGE_ALIGN(size); + struct remap_pfn r; + pgd_t *pgd; + int err; + + if (WARN_ON(addr >= end)) + return -EINVAL; + +#define MUST_SET (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) + BUG_ON((vma->vm_flags & MUST_SET) != MUST_SET); +#undef MUST_SET + + r.mm = vma->vm_mm; + r.addr = addr; + r.pfn = pfn; + r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) | + (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK)); + + pgd = pgd_offset(r.mm, addr); + do + err = remap_pud_range(&r, pgd++, pgd_addr_end(r.addr, end)); + while (err == 0 && r.addr < end); + + if (err) + zap_vma_ptes(vma, addr, r.addr - addr); + + return err; +} +EXPORT_SYMBOL_GPL(remap_io_mapping); -- 2.7.0.rc3 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx