Quoting Mika Kuoppala (2019-06-18 17:17:31) > If we setup backing phys page for 3lvl pdps, even they even though they > are not used, we lose 5 pages per ppgtt. > > Trading this memory on bsw, we gain more common code paths for all > gen8+ directory manipulation. And those paths are now void of checks > for page directory type, making the hot paths faster. > > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 106 +++++++++++++++++----------- > 1 file changed, 66 insertions(+), 40 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index b521b1ddd19b..ea78302c6348 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -715,22 +715,14 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) > return pd; > } > > -static inline bool pd_has_phys_page(const struct i915_page_directory * const pd) > -{ > - return pd->base.page; > -} > - > static void free_pd(struct i915_address_space *vm, > struct i915_page_directory *pd) > { > - if (likely(pd_has_phys_page(pd))) > - cleanup_page_dma(vm, &pd->base); > - > + cleanup_page_dma(vm, &pd->base); > kfree(pd); > } > > #define init_pd(vm, pd, to) { \ > - GEM_DEBUG_BUG_ON(!pd_has_phys_page(pd)); \ > fill_px((vm), (pd), gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \ > memset_p((pd)->entry, (to), 512); \ > } > @@ -1539,6 +1531,50 @@ static void ppgtt_init(struct drm_i915_private *i915, > ppgtt->vm.vma_ops.clear_pages = clear_pages; > } > > +static void init_pd_n(struct i915_address_space *vm, > + struct i915_page_directory *pd, > + struct i915_page_directory *to, > + const unsigned int entries) > +{ > + const u64 daddr = gen8_pde_encode(px_dma(to), I915_CACHE_LLC); > + u64 * const vaddr = kmap_atomic(pd->base.page); > + > + memset64(vaddr, daddr, entries); > + kunmap_atomic(vaddr); > + > + memset_p(pd->entry, to, entries); > +} > + > +static struct i915_page_directory * > +gen8_alloc_top_pd(struct i915_address_space *vm) > +{ > + struct i915_page_directory *pd; > + > + if (i915_vm_is_4lvl(vm)) { > + pd = alloc_pd(vm); > + if (!IS_ERR(pd)) > + init_pd(vm, pd, vm->scratch_pdp); > + > + return pd; > + } > + > + /* 3lvl */ > + pd = __alloc_pd(); > + if (!pd) > + return ERR_PTR(-ENOMEM); > + > + pd->entry[GEN8_3LVL_PDPES] = NULL; > + > + if (unlikely(setup_page_dma(vm, &pd->base))) { > + kfree(pd); > + return ERR_PTR(-ENOMEM); > + } > + > + init_pd_n(vm, pd, vm->scratch_pd, GEN8_3LVL_PDPES); > + > + return pd; > +} > + > /* > * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers > * with a net effect resembling a 2-level page table in normal x86 terms. Each > @@ -1548,6 +1584,7 @@ static void ppgtt_init(struct drm_i915_private *i915, > */ > static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) > { > + struct i915_address_space *vm; > struct i915_ppgtt *ppgtt; > int err; > > @@ -1557,70 +1594,59 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) > > ppgtt_init(i915, ppgtt); > > + vm = &ppgtt->vm; Been having this debate with Tursulin, whether or not it is more confusing to have a local alias here. I think on reading it, it is much clearer that we are setting up one object if we use ppgtt->vm.foo than it is to alternate between ppgtt->foo and vm->bar. I'd suggest leaving it as ppgtt->vm.foo in this patch. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx