If we setup backing phys page for 3lvl pdps, even they are not used, we lose 5 pages per ppgtt. Trading this memory on bsw, we gain more common code paths for all gen8+ directory manipulation. And those paths are now void of checks for page directory type, making the hot paths faster. Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 112 +++++++++++++++++----------- 1 file changed, 68 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ba2802c25d13..c76c92072d54 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -714,22 +714,14 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) return pd; } -static inline bool pd_has_phys_page(const struct i915_page_directory * const pd) -{ - return pd->base.page; -} - static void free_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - if (likely(pd_has_phys_page(pd))) - cleanup_page_dma(vm, &pd->base); - + cleanup_page_dma(vm, &pd->base); kfree(pd); } #define init_pd(vm, pd, to) { \ - GEM_DEBUG_BUG_ON(!pd_has_phys_page(pd)); \ fill_px((vm), (pd), gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \ memset_p((pd)->entry, (to), 512); \ } @@ -747,8 +739,7 @@ static void __set_pd_entry(struct i915_page_directory * const pd, #define set_pd_entry(pd, pde, to) ({ \ (pd)->entry[(pde)] = (to); \ - if (likely(pd_has_phys_page(pd))) \ - __set_pd_entry((pd), (pde), \ + __set_pd_entry((pd), (pde), \ gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \ }) @@ -764,8 +755,7 @@ __swap_pd_entry(struct i915_page_directory * const pd, if (likely(old == old_val)) { atomic_inc(&pd->used); - if (likely(pd_has_phys_page(pd))) - __set_pd_entry(pd, pde, encode(daddr, I915_CACHE_LLC)); + __set_pd_entry(pd, pde, encode(daddr, I915_CACHE_LLC)); } return old; @@ -1539,6 +1529,50 @@ static void ppgtt_init(struct drm_i915_private *i915, ppgtt->vm.vma_ops.clear_pages = clear_pages; } +static void init_pd_n(struct i915_address_space *vm, + struct i915_page_directory *pd, + struct i915_page_directory *to, + const unsigned int entries) +{ + const u64 daddr = gen8_pde_encode(px_dma(to), I915_CACHE_LLC); + u64 * const vaddr = kmap_atomic(pd->base.page); + + memset64(vaddr, daddr, entries); + kunmap_atomic(vaddr); + + memset_p(pd->entry, to, entries); +} + +static struct i915_page_directory * +gen8_alloc_top_pd(struct i915_address_space *vm) +{ + struct i915_page_directory *pd; + + if (i915_vm_is_4lvl(vm)) { + pd = alloc_pd(vm); + if (!IS_ERR(pd)) + init_pd(vm, pd, vm->scratch_pdp); + + return pd; + } + + /* 3lvl */ + pd = __alloc_pd(); + if (!pd) + return ERR_PTR(-ENOMEM); + + pd->entry[GEN8_3LVL_PDPES] = NULL; + + if (unlikely(setup_page_dma(vm, &pd->base))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + + init_pd_n(vm, pd, vm->scratch_pd, GEN8_3LVL_PDPES); + + return pd; +} + /* * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -1548,6 +1582,7 @@ static void ppgtt_init(struct drm_i915_private *i915, */ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) { + struct i915_address_space *vm; struct i915_ppgtt *ppgtt; int err; @@ -1557,70 +1592,59 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) ppgtt_init(i915, ppgtt); + vm = &ppgtt->vm; + /* * From bdw, there is hw support for read-only pages in the PPGTT. * * Gen11 has HSDES#:1807136187 unresolved. Disable ro support * for now. */ - ppgtt->vm.has_read_only = INTEL_GEN(i915) != 11; + vm->has_read_only = INTEL_GEN(i915) != 11; /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. */ if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915)) - ppgtt->vm.pt_kmap_wc = true; + vm->pt_kmap_wc = true; - err = gen8_init_scratch(&ppgtt->vm); + err = gen8_init_scratch(vm); if (err) goto err_free; - ppgtt->pd = __alloc_pd(); - if (!ppgtt->pd) { - err = -ENOMEM; + ppgtt->pd = gen8_alloc_top_pd(vm); + if (IS_ERR(ppgtt->pd)) { + err = PTR_ERR(ppgtt->pd); goto err_free_scratch; } - if (i915_vm_is_4lvl(&ppgtt->vm)) { - err = setup_page_dma(&ppgtt->vm, &ppgtt->pd->base); - if (err) - goto err_free_pdp; - - init_pd(&ppgtt->vm, ppgtt->pd, ppgtt->vm.scratch_pdp); - - ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl; - ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl; - ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl; + if (i915_vm_is_4lvl(vm)) { + vm->allocate_va_range = gen8_ppgtt_alloc_4lvl; + vm->insert_entries = gen8_ppgtt_insert_4lvl; + vm->clear_range = gen8_ppgtt_clear_4lvl; } else { - /* - * We don't need to setup dma for top level pdp, only - * for entries. So point entries to scratch. - */ - memset_p(ppgtt->pd->entry, ppgtt->vm.scratch_pd, - GEN8_3LVL_PDPES); - if (intel_vgpu_active(i915)) { err = gen8_preallocate_top_level_pdp(ppgtt); if (err) - goto err_free_pdp; + goto err_free_pd; } - ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl; - ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl; - ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl; + vm->allocate_va_range = gen8_ppgtt_alloc_3lvl; + vm->insert_entries = gen8_ppgtt_insert_3lvl; + vm->clear_range = gen8_ppgtt_clear_3lvl; } if (intel_vgpu_active(i915)) gen8_ppgtt_notify_vgt(ppgtt, true); - ppgtt->vm.cleanup = gen8_ppgtt_cleanup; + vm->cleanup = gen8_ppgtt_cleanup; return ppgtt; -err_free_pdp: - free_pd(&ppgtt->vm, ppgtt->pd); +err_free_pd: + free_pd(vm, ppgtt->pd); err_free_scratch: - gen8_free_scratch(&ppgtt->vm); + gen8_free_scratch(vm); err_free: kfree(ppgtt); return ERR_PTR(err); -- 2.17.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx