Quoting Matthew Auld (2017-07-25 20:21:23) > Support inserting 1G gtt pages into the 48b PPGTT. > > v2: sanity check sg->length against page_size > > Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 73 +++++++++++++++++++++++++++++++++++-- > drivers/gpu/drm/i915/i915_gem_gtt.h | 2 + > 2 files changed, 71 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 385cd85f47bb..acd0c0d1ba8d 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -945,6 +945,66 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, > cache_level); > } > > +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, > + struct i915_page_directory_pointer **pdps, > + struct sgt_dma *iter, > + enum i915_cache_level cache_level) > +{ > + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); > + u64 start = vma->node.start; > + > + do { > + struct gen8_insert_pte idx = gen8_insert_pte(start); > + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; > + struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; > + struct i915_page_table *pt = pd->page_table[idx.pde]; > + dma_addr_t rem = iter->max - iter->dma; > + unsigned int page_size; > + gen8_pte_t encode = pte_encode; > + gen8_pte_t *vaddr; > + u16 index, max; > + > + if (unlikely(vma->page_sizes.sg & I915_GTT_PAGE_SIZE_1G) && > + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_1G) && > + rem >= I915_GTT_PAGE_SIZE_1G && !(idx.pte | idx.pde)) { > + vaddr = kmap_atomic_px(pdp); > + index = idx.pdpe; > + max = GEN8_PML4ES_PER_PML4; > + page_size = I915_GTT_PAGE_SIZE_1G; > + encode |= GEN8_PDPE_PS_1G; > + } else { > + vaddr = kmap_atomic_px(pt); > + index = idx.pte; > + max = GEN8_PTES; > + page_size = I915_GTT_PAGE_SIZE; > + } > + > + do { > + GEM_BUG_ON(iter->sg->length < page_size); > + vaddr[index++] = encode | iter->dma; > + > + start += page_size; > + iter->dma += page_size; > + if (iter->dma >= iter->max) { > + iter->sg = __sg_next(iter->sg); > + if (!iter->sg) > + break; > + > + iter->dma = sg_dma_address(iter->sg); > + iter->max = iter->dma + iter->sg->length; > + > + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) > + break; > + } > + rem = iter->max - iter->dma; > + > + } while (rem >= page_size && index < max); > + > + kunmap_atomic(vaddr); > + > + } while (iter->sg); > +} Staring at the final result, I think most importantly we need to break up index/max/page, encode, vaddr with whitespace: static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, struct i915_page_directory_pointer **pdps, struct sgt_dma *iter, enum i915_cache_level cache_level) { const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); u64 start = vma->node.start; do { struct gen8_insert_pte idx = gen8_insert_pte(start); struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; struct i915_page_table *pt = pd->page_table[idx.pde]; dma_addr_t rem = iter->max - iter->dma; unsigned int page_size; bool maybe_64K = false; gen8_pte_t encode = pte_encode; gen8_pte_t *vaddr; u16 index, max; if (!(idx.pte | idx.pde) && IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_1G) && rem >= I915_GTT_PAGE_SIZE_1G) { index = idx.pdpe; max = GEN8_PML4ES_PER_PML4; page_size = I915_GTT_PAGE_SIZE_1G; encode |= GEN8_PDPE_PS_1G; vaddr = kmap_atomic_px(pdp); } else if (!idx.pte && IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && rem >= I915_GTT_PAGE_SIZE_2M) { index = idx.pde; max = I915_PDES; page_size = I915_GTT_PAGE_SIZE_2M; encode |= GEN8_PDE_PS_2M; vaddr = kmap_atomic_px(pd); } else { index = idx.pte; max = GEN8_PTES; page_size = I915_GTT_PAGE_SIZE; if (!index && IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && (rem >= (max - index) * 4<<10 || IS_ALIGNED(rem, 64 << 10))) maybe_64K = true; vaddr = kmap_atomic_px(pt); } do { GEM_BUG_ON(iter->sg->length < page_size); vaddr[index++] = encode | iter->dma; start += page_size; iter->dma += page_size; rem -= page_size; if (iter->dma >= iter->max) { iter->sg = __sg_next(iter->sg); if (!iter->sg) break; iter->dma = sg_dma_address(iter->sg); iter->max = iter->dma + iter->sg->length; rem = iter->max - iter->dma; if (maybe_64K && index < max && !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && (rem >= (max - index) * 4<<10 || IS_ALIGNED(rem, 64 << 10)))) maybe_64K = false; if (unlikely(!IS_ALIGNED(iter->dma, page_size))) break; } } while (rem >= page_size && index < max); kunmap_atomic(vaddr); /* Is it safe to mark the 2M block as 64K? -- Either we have * filled whole page-table with 64K entries, or filled part of * it and have reached the end of the sg table and we have * enough padding. * * XXX We need 64k scratch to allow index < max */ if (maybe_64K && index == max) { vaddr = kmap_atomic_px(pd); vaddr[idx.pde] |= GEN8_PDE_IPS_64K; kunmap_atomic(vaddr); page_size = I915_GTT_PAGE_SIZE_64K; } vma->page_sizes.gtt |= page_size; } while (iter->sg); } Please excuse the raw numbers, I was just staring at the maybe_64k problem trying to find a neater/consistent way of expressing it. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx