Support inserting 64K pages into the 48b PPGTT. v2: check for 64K scratch v3: we should only have to re-adjust maybe_64K at every sg interval Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Reviewed-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 31 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 7 +++++++ 2 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 86e9b6c3a987..5b6829990657 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1072,6 +1072,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; unsigned int page_size; + bool maybe_64K = false; gen8_pte_t encode = pte_encode; gen8_pte_t *vaddr; u16 index, max; @@ -1093,6 +1094,13 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, max = GEN8_PTES; page_size = I915_GTT_PAGE_SIZE; + if (!index && + vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (max - index) << PAGE_SHIFT)) + maybe_64K = true; + vaddr = kmap_atomic_px(pt); } @@ -1112,12 +1120,35 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, iter->dma = sg_dma_address(iter->sg); iter->max = iter->dma + rem; + if (maybe_64K && index < max && + !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (max - index) << PAGE_SHIFT))) + maybe_64K = false; + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) break; } } while (rem >= page_size && index < max); kunmap_atomic(vaddr); + + /* + * Is it safe to mark the 2M block as 64K? -- Either we have + * filled whole page-table with 64K entries, or filled part of + * it and have reached the end of the sg table and we have + * enough padding. + */ + if (maybe_64K && + (index == max || + (i915_vm_has_scratch_64K(vma->vm) && + !iter->sg && IS_ALIGNED(vma->node.start + + vma->node.size, + I915_GTT_PAGE_SIZE_2M)))) { + vaddr = kmap_atomic_px(pd); + vaddr[idx.pde] |= GEN8_PDE_IPS_64K; + kunmap_atomic(vaddr); + } } while (iter->sg); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 0a31dc369c28..475e4cf042be 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -153,6 +153,7 @@ typedef u64 gen8_ppgtt_pml4e_t; #define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4)) #define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6)) +#define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) struct sg_table; @@ -351,6 +352,12 @@ i915_vm_is_48bit(const struct i915_address_space *vm) return (vm->total - 1) >> 32; } +static inline bool +i915_vm_has_scratch_64K(struct i915_address_space *vm) +{ + return vm->scratch_page.order == get_order(I915_GTT_PAGE_SIZE_64K); +} + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a -- 2.13.5 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx