Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Improve the sg iteration and in hte process eliminate a bug in > miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the > full length of the sg table. > > v2: Check for the end of the fourth level page table (the final pdpe) > and move onto the next. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 170 +++++++++++++++++++----------------- > 1 file changed, 91 insertions(+), 79 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index ca1f5fa6984f..ddfb5963f521 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -751,9 +751,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, > unsigned int num_entries = gen8_pte_count(start, length); > unsigned int pte = gen8_pte_index(start); > unsigned int pte_end = pte + num_entries; > - gen8_pte_t *pt_vaddr; > - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, > - I915_CACHE_LLC); > + const gen8_pte_t scratch_pte = > + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); > + gen8_pte_t *vaddr; > > if (WARN_ON(!px_page(pt))) > return false; > @@ -766,12 +766,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, > return true; > } > > - pt_vaddr = kmap_px(pt); > - > + vaddr = kmap_px(pt); > while (pte < pte_end) > - pt_vaddr[pte++] = scratch_pte; > - > - kunmap_px(ppgtt, pt_vaddr); > + vaddr[pte++] = scratch_pte; > + kunmap_px(ppgtt, vaddr); > > return false; > } > @@ -879,71 +877,98 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, > gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); > } > > -static void > -gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, > +struct sgt_dma { > + struct scatterlist *sg; > + dma_addr_t dma, max; > +}; > + > +static __always_inline bool > +gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, > struct i915_page_directory_pointer *pdp, > - struct sg_page_iter *sg_iter, > - uint64_t start, > + struct sgt_dma *iter, > + u64 start, > enum i915_cache_level cache_level) > { > - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > - gen8_pte_t *pt_vaddr; > - unsigned pdpe = gen8_pdpe_index(start); > - unsigned pde = gen8_pde_index(start); > - unsigned pte = gen8_pte_index(start); > + unsigned int pdpe = gen8_pdpe_index(start); > + unsigned int pde = gen8_pde_index(start); > + unsigned int pte = gen8_pte_index(start); > + struct i915_page_directory *pd; > + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); > + gen8_pte_t *vaddr; > + bool ret; > > - pt_vaddr = NULL; > + pd = pdp->page_directory[pdpe]; > + vaddr = kmap_px(pd->page_table[pde]); > + do { > + vaddr[pte] = pte_encode | iter->dma; > + iter->dma += PAGE_SIZE; > + if (iter->dma >= iter->max) { > + iter->sg = __sg_next(iter->sg); > + if (!iter->sg) { > + ret = false; > + break; > + } > > - while (__sg_page_iter_next(sg_iter)) { > - if (pt_vaddr == NULL) { > - struct i915_page_directory *pd = pdp->page_directory[pdpe]; > - struct i915_page_table *pt = pd->page_table[pde]; > - pt_vaddr = kmap_px(pt); > + iter->dma = sg_dma_address(iter->sg); > + iter->max = iter->dma + iter->sg->length; > } > > - pt_vaddr[pte] = > - gen8_pte_encode(sg_page_iter_dma_address(sg_iter), > - cache_level); > if (++pte == GEN8_PTES) { > - kunmap_px(ppgtt, pt_vaddr); > - pt_vaddr = NULL; > if (++pde == I915_PDES) { > - if (++pdpe == I915_PDPES_PER_PDP(vm->i915)) > + if (++pdpe == GEN8_PML4ES_PER_PML4) { For the next reader, you could add a comment that we run out of scatterlists elements on 32bit ppgtt (pages->sgl) before reaching here. Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> -Mika > + ret = true; > break; > + } > + > + pd = pdp->page_directory[pdpe]; > pde = 0; > } > + > + kunmap_px(ppgtt, vaddr); > + vaddr = kmap_px(pd->page_table[pde]); > pte = 0; > } > - } > + } while (1); > + kunmap_px(ppgtt, vaddr); > > - if (pt_vaddr) > - kunmap_px(ppgtt, pt_vaddr); > + return ret; > } > > -static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, > - struct sg_table *pages, > - uint64_t start, > - enum i915_cache_level cache_level, > - u32 unused) > +static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, > + struct sg_table *pages, > + u64 start, > + enum i915_cache_level cache_level, > + u32 unused) > { > struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > - struct sg_page_iter sg_iter; > + struct sgt_dma iter = { > + .sg = pages->sgl, > + .dma = sg_dma_address(iter.sg), > + .max = iter.dma + iter.sg->length, > + }; > > - __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); > + gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, > + start, cache_level); > +} > > - if (!USES_FULL_48BIT_PPGTT(vm->i915)) { > - gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, > - cache_level); > - } else { > - struct i915_page_directory_pointer *pdp; > - uint64_t pml4e; > - uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; > +static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, > + struct sg_table *pages, > + uint64_t start, > + enum i915_cache_level cache_level, > + u32 unused) > +{ > + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > + struct sgt_dma iter = { > + .sg = pages->sgl, > + .dma = sg_dma_address(iter.sg), > + .max = iter.dma + iter.sg->length, > + }; > + struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; > + unsigned int pml4e = gen8_pml4e_index(start); > > - gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { > - gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, > - start, cache_level); > - } > - } > + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter, > + start, cache_level)) > + ; > } > > static void gen8_free_page_tables(struct drm_i915_private *dev_priv, > @@ -1525,8 +1550,8 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) > struct i915_address_space *vm = &ppgtt->base; > uint64_t start = ppgtt->base.start; > uint64_t length = ppgtt->base.total; > - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, > - I915_CACHE_LLC); > + const gen8_pte_t scratch_pte = > + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); > > if (!USES_FULL_48BIT_PPGTT(vm->i915)) { > gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); > @@ -1591,7 +1616,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > ppgtt->base.start = 0; > ppgtt->base.cleanup = gen8_ppgtt_cleanup; > ppgtt->base.allocate_va_range = gen8_alloc_va_range; > - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; > ppgtt->base.clear_range = gen8_ppgtt_clear_range; > ppgtt->base.unbind_vma = ppgtt_unbind_vma; > ppgtt->base.bind_vma = ppgtt_bind_vma; > @@ -1606,6 +1630,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > > ppgtt->base.total = 1ULL << 48; > ppgtt->switch_mm = gen8_48b_mm_switch; > + > + ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; > } else { > ret = __pdp_init(dev_priv, &ppgtt->pdp); > if (ret) > @@ -1622,6 +1648,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > if (ret) > goto free_scratch; > } > + > + ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; > } > > if (intel_vgpu_active(dev_priv)) > @@ -1888,11 +1916,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, > } > } > > -struct sgt_dma { > - struct scatterlist *sg; > - dma_addr_t dma, max; > -}; > - > static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, > struct sg_table *pages, > uint64_t start, > @@ -2434,26 +2457,15 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, > struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); > struct sgt_iter sgt_iter; > gen8_pte_t __iomem *gtt_entries; > - gen8_pte_t gtt_entry; > + const gen8_pte_t pte_encode = gen8_pte_encode(0, level); > dma_addr_t addr; > - int i = 0; > > - gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); > + gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; > + gtt_entries += start >> PAGE_SHIFT; > + for_each_sgt_dma(addr, sgt_iter, st) > + gen8_set_pte(gtt_entries++, pte_encode | addr); > > - for_each_sgt_dma(addr, sgt_iter, st) { > - gtt_entry = gen8_pte_encode(addr, level); > - gen8_set_pte(>t_entries[i++], gtt_entry); > - } > - > - /* > - * XXX: This serves as a posting read to make sure that the PTE has > - * actually been updated. There is some concern that even though > - * registers and PTEs are within the same BAR that they are potentially > - * of NUMA access patterns. Therefore, even with the way we assume > - * hardware should work, we must keep this posting read for paranoia. > - */ > - if (i != 0) > - WARN_ON(readq(>t_entries[i-1]) != gtt_entry); > + wmb(); > > /* This next bit makes the above posting read even more important. We > * want to flush the TLBs only after we're certain all the PTE updates > @@ -2541,7 +2553,9 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, > struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); > unsigned first_entry = start >> PAGE_SHIFT; > unsigned num_entries = length >> PAGE_SHIFT; > - gen8_pte_t scratch_pte, __iomem *gtt_base = > + const gen8_pte_t scratch_pte = > + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); > + gen8_pte_t __iomem *gtt_base = > (gen8_pte_t __iomem *)ggtt->gsm + first_entry; > const int max_entries = ggtt_total_entries(ggtt) - first_entry; > int i; > @@ -2551,8 +2565,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, > first_entry, num_entries, max_entries)) > num_entries = max_entries; > > - scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, > - I915_CACHE_LLC); > for (i = 0; i < num_entries; i++) > gen8_set_pte(>t_base[i], scratch_pte); > readl(gtt_base); > -- > 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx