On 6 June 2018 at 07:51, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > Currently all page directories are bound at creation using an > unevictable node in the GGTT. This severely limits us as we cannot > remove any inactive ppgtt for new contexts, or under aperture pressure. > To fix this we need to make the page directory into a first class and > unbindable vma. Hence, the creation of a custom vma to wrap the page > directory as opposed to a GEM object. > > In this patch, we leave the page directories pinned upon creation. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > Cc: Matthew Auld <matthew.william.auld@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 261 ++++++++++++++++------------ > drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- > drivers/gpu/drm/i915/i915_vma.h | 7 + > 3 files changed, 159 insertions(+), 111 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index ea27f799101f..60b5966360b6 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -1640,50 +1640,55 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m) > { > struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base); > struct i915_address_space *vm = &base->vm; > - struct i915_page_table *unused; > - gen6_pte_t scratch_pte; > - u32 pd_entry, pte, pde; > - > - scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); > - > - gen6_for_all_pdes(unused, &base->pd, pde) { > - u32 expected; > - gen6_pte_t *pt_vaddr; > - const dma_addr_t pt_addr = px_dma(base->pd.page_table[pde]); > - pd_entry = readl(ppgtt->pd_addr + pde); > - expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); > - > - if (pd_entry != expected) > - seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", > - pde, > - pd_entry, > - expected); > - seq_printf(m, "\tPDE: %x\n", pd_entry); > - > - pt_vaddr = kmap_atomic_px(base->pd.page_table[pde]); > - > - for (pte = 0; pte < GEN6_PTES; pte+=4) { > - unsigned long va = > - (pde * PAGE_SIZE * GEN6_PTES) + > - (pte * PAGE_SIZE); > + const gen6_pte_t scratch_pte = > + vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); > + struct i915_page_table *pt; > + u32 pte, pde; > + > + gen6_for_all_pdes(pt, &base->pd, pde) { > + gen6_pte_t *vaddr; > + > + if (pt == base->vm.scratch_pt) > + continue; > + > + if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) { > + u32 expected = > + GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | > + GEN6_PDE_VALID; > + u32 pd_entry = readl(ppgtt->pd_addr + pde); > + > + if (pd_entry != expected) > + seq_printf(m, > + "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", > + pde, > + pd_entry, > + expected); > + > + seq_printf(m, "\tPDE: %x\n", pd_entry); > + } > + > + vaddr = kmap_atomic_px(base->pd.page_table[pde]); > + for (pte = 0; pte < GEN6_PTES; pte += 4) { > int i; > - bool found = false; > + > for (i = 0; i < 4; i++) > - if (pt_vaddr[pte + i] != scratch_pte) > - found = true; > - if (!found) > + if (vaddr[pte + i] != scratch_pte) > + break; > + if (i == 4) > continue; > > - seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); > + seq_printf(m, "\t\t(%03d, %04d) %08lx: ", > + pde, pte, > + (pde * GEN6_PTES + pte) * PAGE_SIZE); > for (i = 0; i < 4; i++) { > - if (pt_vaddr[pte + i] != scratch_pte) > - seq_printf(m, " %08x", pt_vaddr[pte + i]); > + if (vaddr[pte + i] != scratch_pte) > + seq_printf(m, " %08x", vaddr[pte + i]); > else > - seq_puts(m, " SCRATCH "); > + seq_puts(m, " SCRATCH"); > } > seq_puts(m, "\n"); > } > - kunmap_atomic(pt_vaddr); > + kunmap_atomic(vaddr); > } > } > > @@ -1697,22 +1702,6 @@ static inline void gen6_write_pde(const struct gen6_hw_ppgtt *ppgtt, > ppgtt->pd_addr + pde); > } > > -/* Write all the page tables found in the ppgtt structure to incrementing page > - * directories. */ > -static void gen6_write_page_range(struct i915_hw_ppgtt *base, > - u32 start, u32 length) > -{ > - struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base); > - struct i915_page_table *pt; > - unsigned int pde; > - > - gen6_for_each_pde(pt, &base->pd, start, length, pde) > - gen6_write_pde(ppgtt, pde, pt); > - > - mark_tlbs_dirty(base); > - gen6_ggtt_invalidate(base->vm.i915); > -} > - > static inline u32 get_pd_offset(struct gen6_hw_ppgtt *ppgtt) > { > GEM_BUG_ON(ppgtt->base.pd.base.ggtt_offset & 0x3f); > @@ -1919,8 +1908,12 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, > > gen6_initialize_pt(vm, pt); > ppgtt->base.pd.page_table[pde] = pt; > - gen6_write_pde(ppgtt, pde, pt); > - flush = true; > + > + if (i915_vma_is_bound(ppgtt->vma, > + I915_VMA_GLOBAL_BIND)) { > + gen6_write_pde(ppgtt, pde, pt); > + flush = true; > + } > } > } > > @@ -1936,8 +1929,11 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, > return -ENOMEM; > } > > -static int gen6_ppgtt_init_scratch(struct i915_address_space *vm) > +static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt) > { > + struct i915_address_space * const vm = &ppgtt->base.vm; > + struct i915_page_table *unused; > + u32 pde; > int ret; > > ret = setup_scratch_page(vm, __GFP_HIGHMEM); > @@ -1951,6 +1947,8 @@ static int gen6_ppgtt_init_scratch(struct i915_address_space *vm) > } > > gen6_initialize_pt(vm, vm->scratch_pt); > + gen6_for_all_pdes(unused, &ppgtt->base.pd, pde) > + ppgtt->base.pd.page_table[pde] = vm->scratch_pt; > > return 0; > } > @@ -1975,52 +1973,110 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) > { > struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); > > - drm_mm_remove_node(&ppgtt->node); > + i915_vma_unpin(ppgtt->vma); > + i915_vma_destroy(ppgtt->vma); > > gen6_ppgtt_free_pd(ppgtt); > gen6_ppgtt_free_scratch(vm); > } > > -static int gen6_ppgtt_allocate_page_directories(struct gen6_hw_ppgtt *ppgtt) > +static int pd_vma_set_pages(struct i915_vma *vma) > { > - struct drm_i915_private *dev_priv = ppgtt->base.vm.i915; > - struct i915_ggtt *ggtt = &dev_priv->ggtt; > - int err; > + vma->pages = ERR_PTR(-ENODEV); > + return 0; > +} > > - /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The > - * allocator works in address space sizes, so it's multiplied by page > - * size. We allocate at the top of the GTT to avoid fragmentation. > - */ > - BUG_ON(!drm_mm_initialized(&ggtt->vm.mm)); > +static void pd_vma_clear_pages(struct i915_vma *vma) > +{ > + GEM_BUG_ON(!vma->pages); > > - err = i915_gem_gtt_insert(&ggtt->vm, &ppgtt->node, > - GEN6_PD_SIZE, GEN6_PD_ALIGN, > - I915_COLOR_UNEVICTABLE, > - 0, ggtt->vm.total, > - PIN_HIGH); > - if (err) > - return err; > + vma->pages = NULL; > +} > > - if (ppgtt->node.start < ggtt->mappable_end) > - DRM_DEBUG("Forced to use aperture for PDEs\n"); > +static int pd_vma_bind(struct i915_vma *vma, > + enum i915_cache_level cache_level, > + u32 unused) > +{ > + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); > + struct gen6_hw_ppgtt *ppgtt = vma->private; > + u32 ggtt_offset = i915_ggtt_offset(vma) / PAGE_SIZE; > + struct i915_page_table *pt; > + unsigned int pde; > > - ppgtt->base.pd.base.ggtt_offset = > - ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); > + ppgtt->base.pd.base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); > + ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; > > - ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + > - ppgtt->base.pd.base.ggtt_offset / sizeof(gen6_pte_t); > + gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) > + gen6_write_pde(ppgtt, pde, pt); > + > + mark_tlbs_dirty(&ppgtt->base); > + gen6_ggtt_invalidate(ppgtt->base.vm.i915); > > return 0; > } > > -static void gen6_scratch_va_range(struct gen6_hw_ppgtt *ppgtt, > - u64 start, u64 length) > +static void pd_vma_unbind(struct i915_vma *vma) > { > - struct i915_page_table *unused; > - u32 pde; > +} > + > +static const struct i915_vma_ops pd_vma_ops = { > + .set_pages = pd_vma_set_pages, > + .clear_pages = pd_vma_clear_pages, > + .bind_vma = pd_vma_bind, > + .unbind_vma = pd_vma_unbind, > +}; > + > +static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) > +{ > + struct drm_i915_private *i915 = ppgtt->base.vm.i915; > + struct i915_ggtt *ggtt = &i915->ggtt; > + struct i915_vma *vma; > + int i; > > - gen6_for_each_pde(unused, &ppgtt->base.pd, start, length, pde) > - ppgtt->base.pd.page_table[pde] = ppgtt->base.vm.scratch_pt; > + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); > + GEM_BUG_ON(size > ggtt->vm.total); > + > + vma = kmem_cache_zalloc(i915->vmas, GFP_KERNEL); > + if (!vma) > + return ERR_PTR(-ENOMEM); > + > + for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) > + init_request_active(&vma->last_read[i], NULL); > + init_request_active(&vma->last_fence, NULL); > + > + vma->vm = &ggtt->vm; > + vma->ops = &pd_vma_ops; > + vma->private = ppgtt; > + > + vma->obj = NULL; > + vma->resv = NULL; > + vma->size = size; > + vma->display_alignment = I915_GTT_MIN_ALIGNMENT; > + > + vma->fence_size = size; > + vma->fence_alignment = I915_GTT_MIN_ALIGNMENT; > + > + vma->flags |= I915_VMA_GGTT; > + vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ > + > + INIT_LIST_HEAD(&vma->obj_link); > + list_add(&vma->vm_link, &vma->vm->unbound_list); > + > + return vma; > +} > + > +static int gen6_ppgtt_pin(struct i915_hw_ppgtt *base) > +{ > + struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base); > + > + /* > + * PPGTT PDEs reside in the GGTT and consists of 512 entries. The > + * allocator works in address space sizes, so it's multiplied by page > + * size. We allocate at the top of the GTT to avoid fragmentation. > + */ > + return i915_vma_pin(ppgtt->vma, > + 0, GEN6_PD_ALIGN, > + PIN_GLOBAL | PIN_HIGH); > } > > static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) > @@ -2058,24 +2114,25 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) > else > BUG(); > > - err = gen6_ppgtt_init_scratch(&ppgtt->base.vm); > + err = gen6_ppgtt_init_scratch(ppgtt); > if (err) > goto err_free; > > - err = gen6_ppgtt_allocate_page_directories(ppgtt); > - if (err) > + ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); > + if (IS_ERR(ppgtt->vma)) err = PTR_ERR() Reviewed-by: Matthew Auld <matthew.william.auld@xxxxxxxxx> _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx