On 25 July 2017 at 20:53, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > Quoting Matthew Auld (2017-07-25 20:21:23) >> Support inserting 1G gtt pages into the 48b PPGTT. >> >> v2: sanity check sg->length against page_size >> >> Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> >> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> >> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >> --- >> drivers/gpu/drm/i915/i915_gem_gtt.c | 73 +++++++++++++++++++++++++++++++++++-- >> drivers/gpu/drm/i915/i915_gem_gtt.h | 2 + >> 2 files changed, 71 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c >> index 385cd85f47bb..acd0c0d1ba8d 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >> @@ -945,6 +945,66 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, >> cache_level); >> } >> >> +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, >> + struct i915_page_directory_pointer **pdps, >> + struct sgt_dma *iter, >> + enum i915_cache_level cache_level) >> +{ >> + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); >> + u64 start = vma->node.start; >> + >> + do { >> + struct gen8_insert_pte idx = gen8_insert_pte(start); >> + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; >> + struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; >> + struct i915_page_table *pt = pd->page_table[idx.pde]; >> + dma_addr_t rem = iter->max - iter->dma; > > We don't need to recalculate rem each loop, it dwindles until we advance > the iter and then we reset rem = iter->sg->length. > >> + unsigned int page_size; >> + gen8_pte_t encode = pte_encode; >> + gen8_pte_t *vaddr; >> + u16 index, max; >> + >> + if (unlikely(vma->page_sizes.sg & I915_GTT_PAGE_SIZE_1G) && >> + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_1G) && >> + rem >= I915_GTT_PAGE_SIZE_1G && !(idx.pte | idx.pde)) { >> + vaddr = kmap_atomic_px(pdp); >> + index = idx.pdpe; >> + max = GEN8_PML4ES_PER_PML4; >> + page_size = I915_GTT_PAGE_SIZE_1G; >> + encode |= GEN8_PDPE_PS_1G; >> + } else { >> + vaddr = kmap_atomic_px(pt); >> + index = idx.pte; >> + max = GEN8_PTES; >> + page_size = I915_GTT_PAGE_SIZE; >> + } >> + >> + do { >> + GEM_BUG_ON(iter->sg->length < page_size); >> + vaddr[index++] = encode | iter->dma; >> + >> + start += page_size; >> + iter->dma += page_size; > > rem -= page_size; > >> + if (iter->dma >= iter->max) { >> + iter->sg = __sg_next(iter->sg); >> + if (!iter->sg) >> + break; >> + > rem = iter->sg->length; >> + iter->dma = sg_dma_address(iter->sg); >> + iter->max = iter->dma + iter->sg->length; > iter->max = iter->dma + rem; > > if (rem < page_size) > break; >> + >> + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) >> + break; >> + } >> + rem = iter->max - iter->dma; >> + >> + } while (rem >= page_size && index < max); > > Check against rem is now redundant. What about an sg entry which spans multiple page sizes? Maybe I'm being overly paranoid, but my thinking was that shmem could potentially to give us two continuous 2M pages, for an object of size 2M + 4K, which after coalescing would give us a single sg entry. I'll try to address the other comments, thanks. > > Then to review the impact upon later patches. > -Chris > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx