It does what the title says by switching over to a single map function instead of a va allocation followed by PTE writes. It still keeps the insert_entries crutch to make risk as low as possible. It will be gone soon. To me, this has always been the most sensible way to treat VMAs and it's not the first time I've written this patch. XXX: This patch was never tested pre-GEN8. After rebase it was compile tested only on GEN8. Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 78 +++++++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 + 2 files changed, 72 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d73f5f5..3b3f844 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -609,6 +609,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, static void gen8_ppgtt_insert_pte_entries(struct i915_pagedirpo *pdp, struct sg_page_iter *sg_iter, uint64_t start, + size_t pages, enum i915_cache_level cache_level, const bool flush) { @@ -619,7 +620,7 @@ static void gen8_ppgtt_insert_pte_entries(struct i915_pagedirpo *pdp, pt_vaddr = NULL; - while (__sg_page_iter_next(sg_iter)) { + while (pages-- && __sg_page_iter_next(sg_iter)) { if (pt_vaddr == NULL) { struct i915_pagedir *pd = pdp->pagedirs[pdpe]; struct i915_pagetab *pt = pd->page_tables[pde]; @@ -660,7 +661,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_page_iter sg_iter; __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); - gen8_ppgtt_insert_pte_entries(pdp, &sg_iter, start, cache_level, !HAS_LLC(vm->dev)); + gen8_ppgtt_insert_pte_entries(pdp, &sg_iter, start, + sg_nents(pages->sgl), + cache_level, !HAS_LLC(vm->dev)); } static void __gen8_do_map_pt(gen8_ppgtt_pde_t * const pde, @@ -907,6 +910,11 @@ static void gen8_teardown_va_range(struct i915_address_space *vm, __gen8_teardown_va_range(vm, start, length, false); } +static void gen8_unmap_vma(struct i915_vma *vma) +{ + gen8_teardown_va_range(vma->vm, vma->node.start, vma->node.size); +} + static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) { trace_i915_va_teardown(&ppgtt->base, @@ -1177,6 +1185,7 @@ static int __gen8_alloc_vma_range_3lvl(struct i915_address_space *vm, if (sg_iter) { BUG_ON(!sg_iter->__nents); gen8_ppgtt_insert_pte_entries(pdp, sg_iter, pd_start, + gen8_pte_count(pd_start, pd_len), flags, !HAS_LLC(vm->dev)); } set_bit(pde, pd->used_pdes); @@ -1301,6 +1310,28 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, start, length, 0); } +static int gen8_map_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) + +{ + struct i915_address_space *vm = vma->vm; + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + struct sg_page_iter sg_iter; + + __sg_page_iter_start(&sg_iter, vma->obj->pages->sgl, + sg_nents(vma->obj->pages->sgl), 0); + if (HAS_48B_PPGTT(vm->dev)) + return __gen8_alloc_vma_range_4lvl(vm, &ppgtt->pml4, &sg_iter, + vma->node.start, + vma->node.size, 0); + else + return __gen8_alloc_vma_range_3lvl(vm, &ppgtt->pdp, &sg_iter, + vma->node.start, + vma->node.size, 0); +} + static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt) { free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev); @@ -1326,7 +1357,6 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->base.start = 0; ppgtt->base.total = size; ppgtt->base.cleanup = gen8_ppgtt_cleanup; - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; ppgtt->enable = gen8_ppgtt_enable; if (HAS_48B_PPGTT(ppgtt->base.dev)) { @@ -1380,6 +1410,9 @@ static int gen8_aliasing_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.allocate_va_range = NULL; ppgtt->base.teardown_va_range = NULL; + ppgtt->base.map_vma = NULL; + ppgtt->base.unmap_vma = NULL; + ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; ppgtt->base.clear_range = gen8_ppgtt_clear_range; return 0; @@ -1395,9 +1428,12 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ppgtt->base.allocate_va_range = gen8_alloc_va_range; - ppgtt->base.teardown_va_range = gen8_teardown_va_range; - ppgtt->base.clear_range = NULL; + ppgtt->base.allocate_va_range = NULL; + ppgtt->base.teardown_va_range = NULL; + ppgtt->base.map_vma = gen8_map_vma; + ppgtt->base.unmap_vma = gen8_unmap_vma; + ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; + ppgtt->base.clear_range = gen8_ppgtt_clear_range; return 0; } @@ -1857,6 +1893,13 @@ unwind_out: return ret; } +static int gen6_map_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + return gen6_alloc_va_range(vma->vm, vma->node.start, vma->node.size); +} + static void gen6_teardown_va_range(struct i915_address_space *vm, uint64_t start, uint64_t length) { @@ -1890,6 +1933,11 @@ static void gen6_teardown_va_range(struct i915_address_space *vm, } } +static void gen6_unmap_vma(struct i915_vma *vma) +{ + gen6_teardown_va_range(vma->vm, vma->node.start, vma->node.size); +} + static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) { struct i915_pagetab *pt; @@ -2019,8 +2067,13 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing) if (ret) return ret; - ppgtt->base.allocate_va_range = gen6_alloc_va_range; - ppgtt->base.teardown_va_range = gen6_teardown_va_range; + if (aliasing) { + ppgtt->base.allocate_va_range = gen6_alloc_va_range; + ppgtt->base.teardown_va_range = gen6_teardown_va_range; + } else { + ppgtt->base.map_vma = gen6_map_vma; + ppgtt->base.unmap_vma = gen6_unmap_vma; + } ppgtt->base.clear_range = gen6_ppgtt_clear_range; ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.cleanup = gen6_ppgtt_cleanup; @@ -2095,6 +2148,15 @@ ppgtt_bind_vma(struct i915_vma *vma, if (vma->obj->gt_ro) flags |= PTE_READ_ONLY; + if (vma->vm->map_vma) { + trace_i915_va_alloc(vma->vm, vma->node.start, vma->node.size, + VM_TO_TRACE_NAME(vma->vm)); + ret = vma->vm->map_vma(vma, cache_level, flags); + if (!ret) + ppgtt_invalidate_tlbs(vma->vm); + return ret; + } + if (vma->vm->allocate_va_range) { trace_i915_va_alloc(vma->vm, vma->node.start, vma->node.size, VM_TO_TRACE_NAME(vma->vm)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f4c611e..d2cd9cc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -295,6 +295,8 @@ struct i915_address_space { struct sg_table *st, uint64_t start, enum i915_cache_level cache_level, u32 flags); + int (*map_vma)(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); + void (*unmap_vma)(struct i915_vma *vma); void (*cleanup)(struct i915_address_space *vm); }; -- 2.0.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx