If we set the IPS bit, aka PDE[11] then every 16th entry should be used to index, the HW makes no assumptions for any other PTEs. Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 74 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 2 + 2 files changed, 76 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3be3cbfb6d28..874854e77247 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -854,6 +854,77 @@ static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) } static __always_inline bool +gen8_ppgtt_insert_64K_pte_entries(struct i915_hw_ppgtt *ppgtt, + struct i915_page_directory_pointer *pdp, + struct sgt_dma *iter, + struct gen8_insert_pte *idx, + enum i915_cache_level cache_level) +{ + struct i915_page_directory *pd; + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + gen8_pte_t *vaddr; + bool ret; + + /* Currently 64K objects should be aligned to 2M to prevent mixing 4K + * and 64K pte's in the same page-table. + */ + GEM_BUG_ON(idx->pte); + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); + pd = pdp->page_directory[idx->pdpe]; + + vaddr = kmap_atomic_px(pd); + vaddr[idx->pde] |= GEN8_PDE_IPS_64K; + kunmap_atomic(vaddr); + + vaddr = kmap_atomic_px(pd->page_table[idx->pde]); + do { + vaddr[idx->pte] = pte_encode | iter->dma; + iter->dma += I915_GTT_PAGE_SIZE_64K; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) { + ret = false; + break; + } + + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + iter->sg->length; + } + + idx->pte += 16; + + if (idx->pte == GEN8_PTES) { + idx->pte = 0; + + if (++idx->pde == I915_PDES) { + idx->pde = 0; + + if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { + idx->pdpe = 0; + ret = true; + break; + } + + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); + pd = pdp->page_directory[idx->pdpe]; + } + + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(pd); + vaddr[idx->pde] |= GEN8_PDE_IPS_64K; + kunmap_atomic(vaddr); + + vaddr = kmap_atomic_px(pd->page_table[idx->pde]); + } + } while (1); + kunmap_atomic(vaddr); + + mark_tlbs_dirty(ppgtt); + + return ret; +} + +static __always_inline bool gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, struct sgt_dma *iter, @@ -954,6 +1025,9 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, case I915_GTT_PAGE_SIZE_4K: insert_entries = gen8_ppgtt_insert_pte_entries; break; + case I915_GTT_PAGE_SIZE_64K: + insert_entries = gen8_ppgtt_insert_64K_pte_entries; + break; default: MISSING_CASE(page_size); return; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 5a2a3907d266..04d37c62c3ef 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -149,6 +149,8 @@ typedef u64 gen8_ppgtt_pml4e_t; #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) #define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) +#define GEN8_PDE_IPS_64K BIT(11) + struct sg_table; struct intel_rotation_info { -- 2.9.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx