Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > We can simplify our gtt walking code by comparing against NULL for > scratch entries as opposed to looking up the distinct per-level scratch > pointer. > > The only caveat is to remember to protect external parties and map the > NULL to the scratch top pd. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 124 +++++++++------------------- > drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- > 2 files changed, 41 insertions(+), 85 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index b7882f06214a..a99b89502a90 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -596,18 +596,17 @@ static void cleanup_page_dma(struct i915_address_space *vm, > > #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) > > -#define fill_px(px, v) fill_page_dma(px_base(px), (v)) > -#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v)) > - > -static void fill_page_dma(struct i915_page_dma *p, const u64 val) > +static void > +fill_page_dma(struct i915_page_dma *p, const u64 val, unsigned int count) > { > - kunmap_atomic(memset64(kmap_atomic(p->page), val, I915_PDES)); > + kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); > } > > -static void fill_page_dma_32(struct i915_page_dma *p, const u32 v) > -{ > - fill_page_dma(p, (u64)v << 32 | v); > -} > +#define fill_px(px, v) fill_page_dma(px_base(px), (v), I915_PDES) > +#define fill32_px(px, v) do { \ > + u64 vv = lower_32_bits(v); \ > + fill_px(px, vv << 32 | vv); \ > +} while (0) > > static int > setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) > @@ -711,7 +710,6 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm) > } > > atomic_set(&pt->used, 0); > - > return pt; > } > > @@ -719,13 +717,11 @@ static struct i915_page_directory *__alloc_pd(void) > { > struct i915_page_directory *pd; > > - pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL); > + pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL); > if (unlikely(!pd)) > return NULL; > > - atomic_set(px_used(pd), 0); > spin_lock_init(&pd->lock); > - > return pd; > } > > @@ -753,63 +749,56 @@ static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd) > > #define free_px(vm, px) free_pd(vm, px_base(px)) > > -static void init_pd(struct i915_page_directory *pd, > - struct i915_page_scratch *scratch) > -{ > - fill_px(pd, scratch->encode); > - memset_p(pd->entry, scratch, 512); > -} > - > static inline void > write_dma_entry(struct i915_page_dma * const pdma, > - const unsigned short pde, > + const unsigned short idx, > const u64 encoded_entry) > { > u64 * const vaddr = kmap_atomic(pdma->page); > > - vaddr[pde] = encoded_entry; > + vaddr[idx] = encoded_entry; > kunmap_atomic(vaddr); > } > > static inline void > __set_pd_entry(struct i915_page_directory * const pd, > - const unsigned short pde, > + const unsigned short idx, My excuse was that as it is a pd, the pde fits. Considering that now it is at any level and pde in bspec is reserved for the last level, idx is better. > struct i915_page_dma * const to, > u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) > { > GEM_BUG_ON(atomic_read(px_used(pd)) > 512); > > atomic_inc(px_used(pd)); > - pd->entry[pde] = to; > - write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC)); > + pd->entry[idx] = to; > + write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC)); > } > > -#define set_pd_entry(pd, pde, to) \ > - __set_pd_entry((pd), (pde), px_base(to), gen8_pde_encode) > +#define set_pd_entry(pd, idx, to) \ > + __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode) > > static inline void > clear_pd_entry(struct i915_page_directory * const pd, > - const unsigned short pde, > - struct i915_page_scratch * const scratch) > + const unsigned short idx, > + const struct i915_page_scratch * const scratch) > { > GEM_BUG_ON(atomic_read(px_used(pd)) == 0); > > - write_dma_entry(px_base(pd), pde, scratch->encode); > - pd->entry[pde] = scratch; > + write_dma_entry(px_base(pd), idx, scratch->encode); > + pd->entry[idx] = NULL; > atomic_dec(px_used(pd)); > } > > static bool > release_pd_entry(struct i915_page_directory * const pd, > - const unsigned short pde, > + const unsigned short idx, > struct i915_page_table * const pt, > - struct i915_page_scratch * const scratch) > + const struct i915_page_scratch * const scratch) > { > bool free = false; > > spin_lock(&pd->lock); > if (atomic_dec_and_test(&pt->used)) { > - clear_pd_entry(pd, pde, scratch); > + clear_pd_entry(pd, idx, scratch); > free = true; > } > spin_unlock(&pd->lock); > @@ -910,7 +899,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm, > int i; > > for (i = 0; i < I915_PDES; i++) { > - if (pd->entry[i] != &vm->scratch[1]) > + if (pd->entry[i]) > free_pd(vm, pd->entry[i]); > } > } > @@ -922,7 +911,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, > int i; > > for (i = 0; i < pdpes; i++) { > - if (pdp->entry[i] == &vm->scratch[2]) > + if (!pdp->entry[i]) > continue; > > gen8_free_page_tables(vm, pdp->entry[i]); > @@ -940,7 +929,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt) > for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { > struct i915_page_directory *pdp = i915_pdp_entry(pml4, i); > > - if (px_base(pdp) == px_base(&ppgtt->vm.scratch[3])) > + if (!pdp) > continue; > > gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp); > @@ -994,8 +983,6 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, > u32 pde; > > gen8_for_each_pde(pt, pd, start, length, pde) { > - GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); > - Null oops is atleast equally descriptive. > atomic_inc(&pt->used); > gen8_ppgtt_clear_pt(vm, pt, start, length); > if (release_pd_entry(pd, pde, pt, &vm->scratch[1])) > @@ -1014,8 +1001,6 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, > unsigned int pdpe; > > gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { > - GEM_BUG_ON(px_base(pd) == px_base(&vm->scratch[2])); > - > atomic_inc(px_used(pd)); > gen8_ppgtt_clear_pd(vm, pd, start, length); > if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2])) > @@ -1044,8 +1029,6 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, > GEM_BUG_ON(!i915_vm_is_4lvl(vm)); > > gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { > - GEM_BUG_ON(px_base(pdp) == px_base(&vm->scratch[3])); > - > atomic_inc(px_used(pdp)); > gen8_ppgtt_clear_pdp(vm, pdp, start, length); > if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3])) > @@ -1066,7 +1049,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, > gen8_for_each_pde(pt, pd, start, length, pde) { > const int count = gen8_pte_count(start, length); > > - if (px_base(pt) == px_base(&vm->scratch[1])) { > + if (!pt) { > spin_unlock(&pd->lock); > > pt = fetch_and_zero(&alloc); > @@ -1081,7 +1064,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, > fill_px(pt, vm->scratch[0].encode); > > spin_lock(&pd->lock); > - if (pd->entry[pde] == &vm->scratch[1]) { > + if (!pd->entry[pde]) { > set_pd_entry(pd, pde, pt); > } else { > alloc = pt; > @@ -1113,7 +1096,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, > > spin_lock(&pdp->lock); > gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { > - if (px_base(pd) == px_base(&vm->scratch[2])) { > + if (!pd) { > spin_unlock(&pdp->lock); > > pd = fetch_and_zero(&alloc); > @@ -1124,10 +1107,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, > goto unwind; > } > > - init_pd(pd, &vm->scratch[1]); > + fill_px(pd, vm->scratch[1].encode); > > spin_lock(&pdp->lock); > - if (pdp->entry[pdpe] == &vm->scratch[2]) { > + if (!pdp->entry[pdpe]) { > set_pd_entry(pdp, pdpe, pd); > } else { > alloc = pd; > @@ -1177,7 +1160,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, > > spin_lock(&pml4->lock); > gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { > - if (px_base(pdp) == px_base(&vm->scratch[3])) { > + if (!pdp) { > spin_unlock(&pml4->lock); > > pdp = fetch_and_zero(&alloc); > @@ -1188,10 +1171,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, > goto unwind; > } > > - init_pd(pdp, &vm->scratch[2]); > + fill_px(pdp, vm->scratch[2].encode); > > spin_lock(&pml4->lock); > - if (pml4->entry[pml4e] == &vm->scratch[3]) { > + if (!pml4->entry[pml4e]) { > set_pd_entry(pml4, pml4e, pdp); > } else { > alloc = pdp; > @@ -1527,7 +1510,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) > if (IS_ERR(pd)) > goto unwind; > > - init_pd(pd, &vm->scratch[1]); > + fill_px(pd, vm->scratch[1].encode); > set_pd_entry(pdp, pdpe, pd); > } > > @@ -1558,46 +1541,19 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) > ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; > } > > -static void init_pd_n(struct i915_address_space *vm, > - struct i915_page_directory *pd, > - struct i915_page_scratch *scratch, > - const unsigned int entries) > -{ > - u64 * const vaddr = kmap_atomic_px(pd); > - > - memset64(vaddr, scratch->encode, entries); > - kunmap_atomic(vaddr); > - > - memset_p(pd->entry, scratch, entries); > -} > - > static struct i915_page_directory * > gen8_alloc_top_pd(struct i915_address_space *vm) > { > + const unsigned int count = vm->total >> __gen8_pte_shift(vm->top); Nice. > struct i915_page_directory *pd; > > - if (i915_vm_is_4lvl(vm)) { > - pd = alloc_pd(vm); > - if (!IS_ERR(pd)) > - init_pd(pd, &vm->scratch[3]); > + GEM_BUG_ON(count > ARRAY_SIZE(pd->entry)); I don't have to think. I like that. > > + pd = alloc_pd(vm); > + if (IS_ERR(pd)) > return pd; > - } > - > - /* 3lvl */ > - pd = __alloc_pd(); > - if (!pd) > - return ERR_PTR(-ENOMEM); > - > - pd->entry[GEN8_3LVL_PDPES] = NULL; Ok you dont like the sentry. Perhaps you could write a few soothing words how noisily we crash if we run long on this runway. If the tower sees and sends firetrucks, it is fine. > - > - if (unlikely(setup_page_dma(vm, px_base(pd)))) { > - kfree(pd); > - return ERR_PTR(-ENOMEM); > - } > - > - init_pd_n(vm, pd, &vm->scratch[2], GEN8_3LVL_PDPES); > > + fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); > return pd; > } > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 669b204d4c13..2341944b9b17 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -610,7 +610,7 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) > { > struct i915_page_dma *pt = ppgtt->pd->entry[n]; > > - return px_dma(pt); > + return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top])); No other external users. Earlier in series I yearned a little of is_entry_available|free|empty. But the comparing against null beats any. Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > } > > static inline struct i915_ggtt * > -- > 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx