Support inserting 64K pages into the 48b PPGTT. v2: check for 64K scratch v3: we should only have to re-adjust maybe_64K at every sg interval Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 30 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 7 +++++++ 2 files changed, 37 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1200afa38bb1..48250ecabbf4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1021,6 +1021,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; unsigned int page_size; + bool maybe_64K = false; gen8_pte_t encode = pte_encode; gen8_pte_t *vaddr; u16 index, max; @@ -1052,6 +1053,13 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, max = GEN8_PTES; page_size = I915_GTT_PAGE_SIZE; + if (!index && + vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (max - index) << PAGE_SHIFT)) + maybe_64K = true; + vaddr = kmap_atomic_px(pt); } @@ -1071,6 +1079,12 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, iter->dma = sg_dma_address(iter->sg); iter->max = iter->dma + rem; + if (maybe_64K && index < max && + !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (max - index) << PAGE_SHIFT))) + maybe_64K = false; + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) break; } @@ -1079,6 +1093,22 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, kunmap_atomic(vaddr); + /* Is it safe to mark the 2M block as 64K? -- Either we have + * filled whole page-table with 64K entries, or filled part of + * it and have reached the end of the sg table and we have + * enough padding. + */ + if (maybe_64K && + (index == max || + (i915_vm_has_scratch_64K(vma->vm) && + !iter->sg && IS_ALIGNED(vma->node.start + + vma->node.size, + I915_GTT_PAGE_SIZE_2M)))) { + vaddr = kmap_atomic_px(pd); + vaddr[idx.pde] |= GEN8_PDE_IPS_64K; + kunmap_atomic(vaddr); + } + } while (iter->sg); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 356fec26e8c9..22b8fd233f30 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -149,6 +149,7 @@ typedef u64 gen8_ppgtt_pml4e_t; #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) #define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) +#define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) #define GEN8_PDPE_PS_1G BIT(7) @@ -349,6 +350,12 @@ i915_vm_is_48bit(const struct i915_address_space *vm) return (vm->total - 1) >> 32; } +static inline bool +i915_vm_has_scratch_64K(struct i915_address_space *vm) +{ + return vm->scratch_page.order == get_order(I915_GTT_PAGE_SIZE_64K); +} + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a -- 2.13.5 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx