Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Quoting Mika Kuoppala (2019-07-09 13:24:27) >> Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: >> >> > We only use the dma pages for scratch, and so do not need to allocate >> > the extra storage for the shadow page directory. >> > >> > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> >> > --- >> > drivers/gpu/drm/i915/i915_gem_gtt.c | 192 ++++++++++++---------------- >> > drivers/gpu/drm/i915/i915_gem_gtt.h | 6 +- >> > 2 files changed, 85 insertions(+), 113 deletions(-) >> > >> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c >> > index 236c964dd761..937236913e70 100644 >> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >> > @@ -594,25 +594,17 @@ static void cleanup_page_dma(struct i915_address_space *vm, >> > >> > #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) >> > >> > -#define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v)) >> > -#define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v)) >> > +#define fill_px(px, v) fill_page_dma(px_base(px), (v)) >> > +#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v)) >> > >> > -static void fill_page_dma(struct i915_address_space *vm, >> > - struct i915_page_dma *p, >> > - const u64 val) >> > +static void fill_page_dma(struct i915_page_dma *p, const u64 val) >> > { >> > - u64 * const vaddr = kmap_atomic(p->page); >> > - >> > - memset64(vaddr, val, PAGE_SIZE / sizeof(val)); >> > - >> > - kunmap_atomic(vaddr); >> > + kunmap_atomic(memset64(kmap_atomic(p->page), val, I915_PDES)); >> >> Neat. >> >> I would go for 512 instead of I915_PDES. There is no magic >> and there never will be magic as it is as const as carved into stone. > > I was just going with I915_PDES and I915_PDE_MASK throughout. Later this > one becomes count, fwiw. >> >> > } >> > >> > -static void fill_page_dma_32(struct i915_address_space *vm, >> > - struct i915_page_dma *p, >> > - const u32 v) >> > +static void fill_page_dma_32(struct i915_page_dma *p, const u32 v) >> > { >> > - fill_page_dma(vm, p, (u64)v << 32 | v); >> > + fill_page_dma(p, (u64)v << 32 | v); >> > } >> > >> > static int >> > @@ -687,6 +679,21 @@ static void cleanup_scratch_page(struct i915_address_space *vm) >> > __free_pages(p->page, order); >> > } >> > >> > +static void free_scratch(struct i915_address_space *vm) >> > +{ >> > + if (!vm->scratch_page.daddr) /* set to 0 on clones */ >> > + return; >> > + >> > + if (vm->scratch_pdp.daddr) >> > + cleanup_page_dma(vm, &vm->scratch_pdp); >> > + if (vm->scratch_pd.daddr) >> > + cleanup_page_dma(vm, &vm->scratch_pd); >> > + if (vm->scratch_pt.daddr) >> > + cleanup_page_dma(vm, &vm->scratch_pt); >> > + >> > + cleanup_scratch_page(vm); >> > +} >> > + >> > static struct i915_page_table *alloc_pt(struct i915_address_space *vm) >> > { >> > struct i915_page_table *pt; >> > @@ -711,18 +718,6 @@ static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) >> > kfree(pt); >> > } >> > >> > -static void gen8_initialize_pt(struct i915_address_space *vm, >> > - struct i915_page_table *pt) >> > -{ >> > - fill_px(vm, pt, vm->scratch_pte); >> > -} >> > - >> > -static void gen6_initialize_pt(struct i915_address_space *vm, >> > - struct i915_page_table *pt) >> > -{ >> > - fill32_px(vm, pt, vm->scratch_pte); >> > -} >> > - >> > static struct i915_page_directory *__alloc_pd(void) >> > { >> > struct i915_page_directory *pd; >> > @@ -765,9 +760,11 @@ static void free_pd(struct i915_address_space *vm, >> > kfree(pd); >> > } >> > >> > -#define init_pd(vm, pd, to) { \ >> > - fill_px((vm), (pd), gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \ >> > - memset_p((pd)->entry, (to), 512); \ >> > +static void init_pd(struct i915_page_directory *pd, >> > + struct i915_page_dma *scratch) >> > +{ >> > + fill_px(pd, gen8_pde_encode(scratch->daddr, I915_CACHE_LLC)); >> > + memset_p(pd->entry, scratch, 512); >> > } >> > >> > static inline void >> > @@ -869,12 +866,11 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, >> > u32 pde; >> > >> > gen8_for_each_pde(pt, pd, start, length, pde) { >> > - GEM_BUG_ON(pt == vm->scratch_pt); >> > + GEM_BUG_ON(px_base(pt) == &vm->scratch_pt); >> > >> > atomic_inc(&pt->used); >> > gen8_ppgtt_clear_pt(vm, pt, start, length); >> > - if (release_pd_entry(pd, pde, &pt->used, >> > - px_base(vm->scratch_pt))) >> > + if (release_pd_entry(pd, pde, &pt->used, &vm->scratch_pt)) >> > free_pt(vm, pt); >> > } >> > } >> > @@ -890,12 +886,11 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, >> > unsigned int pdpe; >> > >> > gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { >> > - GEM_BUG_ON(pd == vm->scratch_pd); >> > + GEM_BUG_ON(px_base(pd) == &vm->scratch_pd); >> >> Perhaps future will bring pd_points_scratch(pd). >> >> Now the intriguing, bordering irritating, question in my mind is >> that can we fold the scratch_pd and scratch_pdp to be the same thing. > > No, we can fold the scratch_pd to be the same (dma wise) as they do need > to end up at the scratch_pte. And sadly we can't use the scratch_pte as > the filler for scratch_pd. Oh indeed. it needs to be hierarchy even on upper level to break out. *blush* > >> Patch lgtm with some dislike towards I915_PDES, > > I'm not keen on it tbh. But the mix of alternating between 512/0x1ff > does suggest to use some name. 512 everywhere. -Mika _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx