On 12/07/2019 14.27, Chris Wilson wrote: > With an explicit level, we can refactor the separate clear functions > as a simple recursive function. The additional knowledge of the level > allows us to spot when we can free an entire subtree at once. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- Reviewed-by: Abdiel Janulgue <abdiel.janulgue@xxxxxxxxxxxxxxx> > drivers/gpu/drm/i915/Kconfig.debug | 15 +++ > drivers/gpu/drm/i915/i915_gem_gtt.c | 154 ++++++++++++++++------------ > 2 files changed, 105 insertions(+), 64 deletions(-) > > diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug > index 8d922bb4d953..ed8c787058a5 100644 > --- a/drivers/gpu/drm/i915/Kconfig.debug > +++ b/drivers/gpu/drm/i915/Kconfig.debug > @@ -94,6 +94,21 @@ config DRM_I915_TRACE_GEM > > If in doubt, say "N". > > +config DRM_I915_TRACE_GTT > + bool "Insert extra ftrace output from the GTT internals" > + depends on DRM_I915_DEBUG_GEM > + select TRACING > + default n > + help > + Enable additional and verbose debugging output that will spam > + ordinary tests, but may be vital for post-mortem debugging when > + used with /proc/sys/kernel/ftrace_dump_on_oops > + > + Recommended for driver developers only. > + > + If in doubt, say "N". > + > + > config DRM_I915_SW_FENCE_DEBUG_OBJECTS > bool "Enable additional driver debugging for fence objects" > depends on DRM_I915 > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 305c65c08a6a..7b2f3188d435 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -46,6 +46,12 @@ > > #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) > > +#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT) > +#define DBG(...) trace_printk(__VA_ARGS__) > +#else > +#define DBG(...) > +#endif > + > /** > * DOC: Global GTT views > * > @@ -796,6 +802,9 @@ release_pd_entry(struct i915_page_directory * const pd, > { > bool free = false; > > + if (atomic_add_unless(&pt->used, -1, 1)) > + return false; > + > spin_lock(&pd->lock); > if (atomic_dec_and_test(&pt->used)) { > clear_pd_entry(pd, idx, scratch); > @@ -927,86 +936,101 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > free_scratch(vm); > } > > -/* Removes entries from a single page table, releasing it if it's empty. > - * Caller can use the return value to update higher-level entries. > - */ > -static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm, > - struct i915_page_table *pt, > - u64 start, u64 length) > +static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, > + struct i915_page_directory * const pd, > + u64 start, const u64 end, int lvl) > { > - const unsigned int num_entries = gen8_pte_count(start, length); > - gen8_pte_t *vaddr; > + const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; > + unsigned int idx, len; > > - vaddr = kmap_atomic_px(pt); > - memset64(vaddr + gen8_pte_index(start), > - vm->scratch[0].encode, > - num_entries); > - kunmap_atomic(vaddr); > + len = gen8_pd_range(start, end, lvl--, &idx); > + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n", > + __func__, vm, lvl + 1, start, end, > + idx, len, atomic_read(px_used(pd))); > + GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); > > - GEM_BUG_ON(num_entries > atomic_read(&pt->used)); > + do { > + struct i915_page_table *pt = pd->entry[idx]; > + > + if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && > + gen8_pd_contains(start, end, lvl)) { > + DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", > + __func__, vm, lvl + 1, idx, start, end); > + clear_pd_entry(pd, idx, scratch); > + __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); > + start += (u64)I915_PDES << gen8_pd_shift(lvl); > + continue; > + } > > - atomic_sub(num_entries, &pt->used); > -} > + if (lvl) { > + start = __gen8_ppgtt_clear(vm, as_pd(pt), > + start, end, lvl); > + } else { > + unsigned int count; > + u64 *vaddr; > > -static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, > - struct i915_page_directory *pd, > - u64 start, u64 length) > -{ > - struct i915_page_table *pt; > - u32 pde; > + count = gen8_pt_count(start, end); > + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} removing pte\n", > + __func__, vm, lvl, start, end, > + gen8_pd_index(start, 0), count, > + atomic_read(&pt->used)); > + GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); > > - gen8_for_each_pde(pt, pd, start, length, pde) { > - atomic_inc(&pt->used); > - gen8_ppgtt_clear_pt(vm, pt, start, length); > - if (release_pd_entry(pd, pde, pt, &vm->scratch[1])) > + vaddr = kmap_atomic_px(pt); > + memset64(vaddr + gen8_pd_index(start, 0), > + vm->scratch[0].encode, > + count); > + kunmap_atomic(vaddr); > + > + atomic_sub(count, &pt->used); > + start += count; > + } > + > + if (release_pd_entry(pd, idx, pt, scratch)) > free_px(vm, pt); > - } > + } while (idx++, --len); > + > + return start; > } > > -/* Removes entries from a single page dir pointer, releasing it if it's empty. > - * Caller can use the return value to update higher-level entries > - */ > -static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, > - struct i915_page_directory * const pdp, > - u64 start, u64 length) > +static void gen8_ppgtt_clear(struct i915_address_space *vm, > + u64 start, u64 length) > { > - struct i915_page_directory *pd; > - unsigned int pdpe; > + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); > + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); > > - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { > - atomic_inc(px_used(pd)); > - gen8_ppgtt_clear_pd(vm, pd, start, length); > - if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2])) > - free_px(vm, pd); > - } > + start >>= GEN8_PTE_SHIFT; > + length >>= GEN8_PTE_SHIFT; > + GEM_BUG_ON(length == 0); > + > + __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, > + start, start + length, vm->top); > } > > -static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, > - u64 start, u64 length) > +static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, > + struct i915_page_directory *pd, > + u64 start, u64 length) > { > - gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length); > + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); > + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); > + > + start >>= GEN8_PTE_SHIFT; > + length >>= GEN8_PTE_SHIFT; > + > + __gen8_ppgtt_clear(vm, pd, start, start + length, 1); > } > > -/* Removes entries from a single pml4. > - * This is the top-level structure in 4-level page tables used on gen8+. > - * Empty entries are always scratch pml4e. > - */ > -static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, > - u64 start, u64 length) > +static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, > + struct i915_page_directory * const pdp, > + u64 start, u64 length) > { > - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > - struct i915_page_directory * const pml4 = ppgtt->pd; > - struct i915_page_directory *pdp; > - unsigned int pml4e; > + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); > + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); > > - GEM_BUG_ON(!i915_vm_is_4lvl(vm)); > + start >>= GEN8_PTE_SHIFT; > + length >>= GEN8_PTE_SHIFT; > > - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { > - atomic_inc(px_used(pdp)); > - gen8_ppgtt_clear_pdp(vm, pdp, start, length); > - if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3])) > - free_px(vm, pdp); > - } > + __gen8_ppgtt_clear(vm, pdp, start, start + length, 2); > } > > static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, > @@ -1171,7 +1195,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, > if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3])) > free_px(vm, pdp); > unwind: > - gen8_ppgtt_clear_4lvl(vm, from, start - from); > + gen8_ppgtt_clear(vm, from, start - from); > out: > if (alloc) > free_px(vm, alloc); > @@ -1484,6 +1508,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) > > fill_px(pd, vm->scratch[1].encode); > set_pd_entry(pdp, pdpe, pd); > + atomic_inc(px_used(pd)); /* keep pinned */ > } > > return 0; > @@ -1524,6 +1549,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm) > } > > fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); > + atomic_inc(px_used(pd)); /* mark as pinned */ > return pd; > } > > @@ -1573,7 +1599,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) > if (i915_vm_is_4lvl(&ppgtt->vm)) { > ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl; > ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl; > - ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl; > } else { > if (intel_vgpu_active(i915)) { > err = gen8_preallocate_top_level_pdp(ppgtt); > @@ -1583,9 +1608,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) > > ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl; > ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl; > - ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl; > } > > + ppgtt->vm.clear_range = gen8_ppgtt_clear; > + > if (intel_vgpu_active(i915)) > gen8_ppgtt_notify_vgt(ppgtt, true); > > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx