On Fri, Feb 20, 2015 at 11:16 PM, Michel Thierry <michel.thierry@xxxxxxxxx> wrote: > When 48b is enabled, gen8_ppgtt_insert_entries needs to read the Page Map > Level 4 (PML4), before it selects which Page Directory Pointer (PDP) > it will write to. > > Similarly, gen8_ppgtt_clear_range needs to get the correct PDP/PD range. > > Also add a scratch page for PML4. > > This patch was inspired by Ben's "Depend exclusively on map and > unmap_vma". > > Signed-off-by: Michel Thierry <michel.thierry@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 66 ++++++++++++++++++++++++++++++------- > drivers/gpu/drm/i915/i915_gem_gtt.h | 12 +++++++ > 2 files changed, 67 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index a1396cb..0954827 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -676,24 +676,52 @@ static void gen8_ppgtt_clear_pte_range(struct i915_page_directory_pointer_entry > } > } > > +static void gen8_ppgtt_clear_range_4lvl(struct i915_hw_ppgtt *ppgtt, > + gen8_gtt_pte_t scratch_pte, > + uint64_t start, > + uint64_t length) > +{ > + struct i915_page_directory_pointer_entry *pdp; > + uint64_t templ4, templ3, pml4e, pdpe; > + > + gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) { > + struct i915_page_directory_entry *pd; > + uint64_t pdp_len = gen8_clamp_pdp(start, length); > + uint64_t pdp_start = start; > + > + gen8_for_each_pdpe(pd, pdp, pdp_start, pdp_len, templ3, pdpe) { The 'gen8_ppgtt_clear_pte_range' function is equipped to switch to a new page directory appropriately. So having just an outer loop of pml4 entries should suffice, with the use of gen8_clamp_pdp. The inner loop 'gen8_for_each_pdpe' is not really needed. > + uint64_t pd_len = gen8_clamp_pd(pdp_start, pdp_len); > + uint64_t pd_start = pdp_start; > + > + gen8_ppgtt_clear_pte_range(pdp, pd_start, pd_len, > + scratch_pte, !HAS_LLC(ppgtt->base.dev)); > + } > + } > +} > + > static void gen8_ppgtt_clear_range(struct i915_address_space *vm, > - uint64_t start, > - uint64_t length, > + uint64_t start, uint64_t length, > bool use_scratch) > { > struct i915_hw_ppgtt *ppgtt = > - container_of(vm, struct i915_hw_ppgtt, base); > - struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; /* FIXME: 48b */ > - > + container_of(vm, struct i915_hw_ppgtt, base); > gen8_gtt_pte_t scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, > I915_CACHE_LLC, use_scratch); > > - gen8_ppgtt_clear_pte_range(pdp, start, length, scratch_pte, !HAS_LLC(vm->dev)); > + if (!USES_FULL_48BIT_PPGTT(vm->dev)) { > + struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; > + > + gen8_ppgtt_clear_pte_range(pdp, start, length, scratch_pte, > + !HAS_LLC(ppgtt->base.dev)); > + } else { > + gen8_ppgtt_clear_range_4lvl(ppgtt, scratch_pte, start, length); > + } > } > > static void gen8_ppgtt_insert_pte_entries(struct i915_page_directory_pointer_entry *pdp, > struct sg_page_iter *sg_iter, > uint64_t start, > + size_t pages, > enum i915_cache_level cache_level, > const bool flush) > { > @@ -704,7 +732,7 @@ static void gen8_ppgtt_insert_pte_entries(struct i915_page_directory_pointer_ent > > pt_vaddr = NULL; > > - while (__sg_page_iter_next(sg_iter)) { > + while (pages-- && __sg_page_iter_next(sg_iter)) { > if (pt_vaddr == NULL) { > struct i915_page_directory_entry *pd = pdp->page_directory[pdpe]; > struct i915_page_table_entry *pt = pd->page_tables[pde]; > @@ -742,11 +770,26 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, > u32 unused) > { > struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); > - struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; /* FIXME: 48b */ > + struct i915_page_directory_pointer_entry *pdp; > struct sg_page_iter sg_iter; > > __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); > - gen8_ppgtt_insert_pte_entries(pdp, &sg_iter, start, cache_level, !HAS_LLC(vm->dev)); > + > + if (!USES_FULL_48BIT_PPGTT(vm->dev)) { > + pdp = &ppgtt->pdp; > + gen8_ppgtt_insert_pte_entries(pdp, &sg_iter, start, > + sg_nents(pages->sgl), > + cache_level, !HAS_LLC(vm->dev)); > + } else { > + struct i915_pml4 *pml4; > + unsigned pml4e = gen8_pml4e_index(start); > + > + pml4 = &ppgtt->pml4; > + pdp = pml4->pdps[pml4e]; Since an object could get mapped, such that it straddles the pdp boundary & 'gen8_ppgtt_insert_pte_entries' can't switch on its own to a new page directory pointer table (pdp), a modification is required here to have a loop of 'gen8_for_each_pml4e', with the use of 'gen8_clamp_pdp', similar to how gen8_ppgtt_clear_range_4lvl has been implemented. > + gen8_ppgtt_insert_pte_entries(pdp, &sg_iter, start, > + sg_nents(pages->sgl), > + cache_level, !HAS_LLC(vm->dev)); > + } > } > > static void __gen8_do_map_pt(gen8_ppgtt_pde_t * const pde, > @@ -1185,7 +1228,8 @@ static int __gen8_alloc_vma_range_3lvl(struct i915_address_space *vm, > if (sg_iter) { > BUG_ON(!sg_iter->__nents); > gen8_ppgtt_insert_pte_entries(pdp, sg_iter, pd_start, > - flags, !HAS_LLC(vm->dev)); > + gen8_pte_count(pd_start, pd_len), > + flags, !HAS_LLC(vm->dev)); > } > set_bit(pde, pd->used_pdes); > } > @@ -1330,7 +1374,7 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) > if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > int ret = pml4_init(ppgtt); > if (ret) { > - unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); > + unmap_and_free_pt(ppgtt->scratch_pml4, ppgtt->base.dev); > return ret; > } > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 1f4cdb1..602d446c 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -332,6 +332,7 @@ struct i915_hw_ppgtt { > union { > struct i915_page_table_entry *scratch_pt; > struct i915_page_table_entry *scratch_pd; /* Just need the daddr */ > + struct i915_page_table_entry *scratch_pml4; > }; > > struct drm_i915_file_private *file_priv; > @@ -452,6 +453,17 @@ static inline uint64_t gen8_clamp_pd(uint64_t start, uint64_t length) > return next_pd - start; > } > > +/* Clamp length to the next page_directory pointer boundary */ > +static inline uint64_t gen8_clamp_pdp(uint64_t start, uint64_t length) > +{ > + uint64_t next_pdp = ALIGN(start + 1, 1ULL << GEN8_PML4E_SHIFT); > + > + if (next_pdp > (start + length)) > + return length; > + > + return next_pdp - start; > +} > + > static inline uint32_t gen8_pte_index(uint64_t address) > { > return i915_pte_index(address, GEN8_PDE_SHIFT); > -- > 2.1.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx