It appears now that we have the ring TLB invalidation in place, we need only update the page directory cachelines that we have altered. A great reduction from rewriting the whole 2MiB ppgtt on every update. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4da1dfe11007..6a2183442fed 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1693,15 +1693,18 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; } -static void gen6_flush_pd(struct gen6_ppgtt *ppgtt) +static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) { struct i915_page_directory * const pd = ppgtt->base.pd; struct i915_page_table *pt; unsigned int pde; + start = round_down(start, SZ_64K); + end = round_up(end, SZ_64K) - start; + mutex_lock(&ppgtt->flush); - gen6_for_all_pdes(pt, pd, pde) + gen6_for_each_pde(pt, pd, start, end, pde) gen6_write_pde(ppgtt, pde, pt); ioread32(ppgtt->pd_addr + pde - 1); @@ -1754,8 +1757,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, spin_unlock(&pd->lock); if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) - /* Rewrite them all! Anything less misses an invalidate. */ - gen6_flush_pd(ppgtt); + gen6_flush_pd(ppgtt, from, start); goto out; @@ -1844,7 +1846,7 @@ static int pd_vma_bind(struct i915_vma *vma, px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; - gen6_flush_pd(ppgtt); + gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total); return 0; } -- 2.24.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx