On Fri, Feb 20, 2015 at 11:15 PM, Michel Thierry <michel.thierry@xxxxxxxxx> wrote: > From: Ben Widawsky <benjamin.widawsky@xxxxxxxxx> > > The code for 4lvl works just as one would expect, and nicely it is able > to call into the existing 3lvl page table code to handle all of the > lower levels. > > PML4 has no special attributes, and there will always be a PML4. > So simply initialize it at creation, and destroy it at the end. > > v2: Return something at the end of gen8_alloc_va_range_4lvl to keep the > compiler happy. And define ret only in one place. > Updated gen8_ppgtt_unmap_pages and gen8_ppgtt_free to handle 4lvl. > > Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> > Signed-off-by: Michel Thierry <michel.thierry@xxxxxxxxx> (v2) > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 240 +++++++++++++++++++++++++++++++----- > drivers/gpu/drm/i915/i915_gem_gtt.h | 11 +- > 2 files changed, 217 insertions(+), 34 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 1edcc17..edada33 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -483,9 +483,12 @@ static void __pdp_fini(struct i915_page_directory_pointer_entry *pdp) > static void unmap_and_free_pdp(struct i915_page_directory_pointer_entry *pdp, > struct drm_device *dev) > { > - __pdp_fini(pdp); > - if (USES_FULL_48BIT_PPGTT(dev)) > + if (USES_FULL_48BIT_PPGTT(dev)) { > + __pdp_fini(pdp); Call to __pdp_fini should be made for the 32 bit also. The 'used_pdpes' bitmap & 'page_directory' double pointer needs to be freed in 32 bit case also (allocated inside __pdp_init, called from gen8_ppgtt_init_common). > + i915_dma_unmap_single(pdp, dev); > + __free_page(pdp->page); > kfree(pdp); > + } > } > > static int __pdp_init(struct i915_page_directory_pointer_entry *pdp, > @@ -511,6 +514,60 @@ static int __pdp_init(struct i915_page_directory_pointer_entry *pdp, > return 0; > } > > +static struct i915_page_directory_pointer_entry *alloc_pdp_single(struct i915_hw_ppgtt *ppgtt, > + struct i915_pml4 *pml4) > +{ > + struct drm_device *dev = ppgtt->base.dev; > + struct i915_page_directory_pointer_entry *pdp; > + int ret; > + > + BUG_ON(!USES_FULL_48BIT_PPGTT(dev)); > + > + pdp = kmalloc(sizeof(*pdp), GFP_KERNEL); > + if (!pdp) > + return ERR_PTR(-ENOMEM); > + > + pdp->page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); > + if (!pdp->page) { > + kfree(pdp); > + return ERR_PTR(-ENOMEM); > + } > + > + ret = __pdp_init(pdp, dev); > + if (ret) { > + __free_page(pdp->page); > + kfree(pdp); > + return ERR_PTR(ret); > + } > + > + i915_dma_map_px_single(pdp, dev); > + > + return pdp; > +} > + > +static void pml4_fini(struct i915_pml4 *pml4) > +{ > + struct i915_hw_ppgtt *ppgtt = > + container_of(pml4, struct i915_hw_ppgtt, pml4); > + i915_dma_unmap_single(pml4, ppgtt->base.dev); > + __free_page(pml4->page); > + /* HACK */ > + pml4->page = NULL; > +} > + > +static int pml4_init(struct i915_hw_ppgtt *ppgtt) > +{ > + struct i915_pml4 *pml4 = &ppgtt->pml4; > + > + pml4->page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!pml4->page) > + return -ENOMEM; > + > + i915_dma_map_px_single(pml4, ppgtt->base.dev); > + > + return 0; > +} > + > /* Broadwell Page Directory Pointer Descriptors */ > static int gen8_write_pdp(struct intel_engine_cs *ring, > unsigned entry, > @@ -712,14 +769,13 @@ static void gen8_free_page_tables(struct i915_page_directory_entry *pd, struct d > } > } > > -static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > +static void gen8_ppgtt_unmap_pages_3lvl(struct i915_page_directory_pointer_entry *pdp, > + struct drm_device *dev) > { > - struct pci_dev *hwdev = ppgtt->base.dev->pdev; > - struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; /* FIXME: 48b */ > + struct pci_dev *hwdev = dev->pdev; > int i, j; > > - for_each_set_bit(i, pdp->used_pdpes, > - I915_PDPES_PER_PDP(ppgtt->base.dev)) { > + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { > struct i915_page_directory_entry *pd; > > if (WARN_ON(!pdp->page_directory[i])) > @@ -747,27 +803,73 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > } > } > > -static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) > +static void gen8_ppgtt_unmap_pages_4lvl(struct i915_hw_ppgtt *ppgtt) > { > + struct pci_dev *hwdev = ppgtt->base.dev->pdev; > int i; > > - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > - for_each_set_bit(i, ppgtt->pdp.used_pdpes, > - I915_PDPES_PER_PDP(ppgtt->base.dev)) { > - if (WARN_ON(!ppgtt->pdp.page_directory[i])) > - continue; > + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { > + struct i915_page_directory_pointer_entry *pdp; > > - gen8_free_page_tables(ppgtt->pdp.page_directory[i], > - ppgtt->base.dev); > - unmap_and_free_pd(ppgtt->pdp.page_directory[i], > - ppgtt->base.dev); > - } > - unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev); > - } else { > - BUG(); /* to be implemented later */ > + if (WARN_ON(!ppgtt->pml4.pdps[i])) > + continue; > + > + pdp = ppgtt->pml4.pdps[i]; > + if (!pdp->daddr) > + pci_unmap_page(hwdev, pdp->daddr, PAGE_SIZE, > + PCI_DMA_BIDIRECTIONAL); > + For consistency & cleanup, the call to pci_unmap_page can be replaced with i915_dma_unmap_single. Same can be done inside the gen8_ppgtt_unmap_pages_3lvl function also. > + gen8_ppgtt_unmap_pages_3lvl(ppgtt->pml4.pdps[i], > + ppgtt->base.dev); > } > } > > +static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > +{ > + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > + gen8_ppgtt_unmap_pages_3lvl(&ppgtt->pdp, ppgtt->base.dev); > + else > + gen8_ppgtt_unmap_pages_4lvl(ppgtt); > +} > + > +static void gen8_ppgtt_free_3lvl(struct i915_page_directory_pointer_entry *pdp, > + struct drm_device *dev) > +{ > + int i; > + > + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { > + if (WARN_ON(!pdp->page_directory[i])) > + continue; > + > + gen8_free_page_tables(pdp->page_directory[i], dev); > + unmap_and_free_pd(pdp->page_directory[i], dev); > + } > + > + unmap_and_free_pdp(pdp, dev); > +} > + > +static void gen8_ppgtt_free_4lvl(struct i915_hw_ppgtt *ppgtt) > +{ > + int i; > + > + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { > + if (WARN_ON(!ppgtt->pml4.pdps[i])) > + continue; > + > + gen8_ppgtt_free_3lvl(ppgtt->pml4.pdps[i], ppgtt->base.dev); > + } > + > + pml4_fini(&ppgtt->pml4); > +} > + > +static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) > +{ > + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > + gen8_ppgtt_free_3lvl(&ppgtt->pdp, ppgtt->base.dev); > + else > + gen8_ppgtt_free_4lvl(ppgtt); > +} > + > static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > { > struct i915_hw_ppgtt *ppgtt = Is the call to 'gen8_ppgtt_unmap_pages' really needed from 'gen8_ppgtt_cleanup' function, considering that gen8_ppgtt_free will do the unmap operation also, for the pml4, pdp, pd & pt pages. > @@ -1040,12 +1142,74 @@ err_out: > return ret; > } > > -static int __noreturn gen8_alloc_va_range_4lvl(struct i915_address_space *vm, > - struct i915_pml4 *pml4, > - uint64_t start, > - uint64_t length) > +static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, > + struct i915_pml4 *pml4, > + uint64_t start, > + uint64_t length) > { > - BUG(); /* to be implemented later */ > + DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); > + struct i915_hw_ppgtt *ppgtt = > + container_of(vm, struct i915_hw_ppgtt, base); > + struct i915_page_directory_pointer_entry *pdp; > + const uint64_t orig_start = start; > + const uint64_t orig_length = length; > + uint64_t temp, pml4e; > + int ret = 0; > + > + /* Do the pml4 allocations first, so we don't need to track the newly > + * allocated tables below the pdp */ > + bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); > + > + /* The page_directoryectory and pagetable allocations are done in the shared 3 > + * and 4 level code. Just allocate the pdps. > + */ > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { > + if (!pdp) { > + WARN_ON(test_bit(pml4e, pml4->used_pml4es)); > + pdp = alloc_pdp_single(ppgtt, pml4); > + if (IS_ERR(pdp)) > + goto err_alloc; > + > + pml4->pdps[pml4e] = pdp; > + set_bit(pml4e, new_pdps); > + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, pml4e, > + pml4e << GEN8_PML4E_SHIFT, > + GEN8_PML4E_SHIFT); > + > + } > + } > + > + WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, > + "The allocation has spanned more than 512GB. " > + "It is highly likely this is incorrect."); > + > + start = orig_start; > + length = orig_length; > + > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { > + BUG_ON(!pdp); > + > + ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); > + if (ret) > + goto err_out; > + } > + > + bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, > + GEN8_PML4ES_PER_PML4); > + > + return 0; > + > +err_out: > + start = orig_start; > + length = orig_length; > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) > + gen8_ppgtt_free_3lvl(pdp, vm->dev); > + > +err_alloc: > + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) > + unmap_and_free_pdp(pdp, vm->dev); > + If __gen8_alloc_vma_range_3lvl returns an error, then there can be 2 calls to unmap_and_free_pdp for the same pdp value. The gen8_ppgtt_free_3lvl will also call the unmap_and_free_pdp for the newly allocated pdp. Already __gen8_alloc_vma_range_3lvl seems to be error handling internally, i.e. it is de-allocating the newly allocated pages for page directory & page tables, if there is an allocation failure somewhere. So similarly __gen8_alloc_vma_range_4lvl function can just do the de-allocation for the newly allocated pdps ('new_pdps'). > + return ret; > } > > static int gen8_alloc_va_range(struct i915_address_space *vm, > @@ -1054,16 +1218,19 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > > - if (!USES_FULL_48BIT_PPGTT(vm->dev)) > - return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); > - else > + if (USES_FULL_48BIT_PPGTT(vm->dev)) > return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); > + else > + return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); > } > > static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt) > { > unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); > - unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev); > + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > + pml4_fini(&ppgtt->pml4); > + else > + unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev); > } > > /** > @@ -1086,14 +1253,21 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) > > ppgtt->switch_mm = gen8_mm_switch; > > - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > + int ret = pml4_init(ppgtt); > + if (ret) { > + unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); > + return ret; > + } > + } else { > int ret = __pdp_init(&ppgtt->pdp, false); > if (ret) { > unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); > return ret; > } > - } else > - return -EPERM; /* Not yet implemented */ > + > + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT); > + } > > return 0; > } > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 144858e..1477f54 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -87,6 +87,7 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > */ > #define GEN8_PML4ES_PER_PML4 512 > #define GEN8_PML4E_SHIFT 39 > +#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1) > #define GEN8_PDPE_SHIFT 30 > /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page > * tables */ > @@ -427,6 +428,14 @@ static inline uint32_t gen6_pde_index(uint32_t addr) > temp = min(temp, length), \ > start += temp, length -= temp) > > +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \ > + for (iter = gen8_pml4e_index(start), pdp = (pml4)->pdps[iter]; \ > + length > 0 && iter < GEN8_PML4ES_PER_PML4; \ > + pdp = (pml4)->pdps[++iter], \ > + temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \ > + temp = min(temp, length), \ > + start += temp, length -= temp) > + > #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ > gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev)) > > @@ -458,7 +467,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address) > > static inline uint32_t gen8_pml4e_index(uint64_t address) > { > - BUG(); /* For 64B */ > + return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; > } > > static inline size_t gen8_pte_count(uint64_t addr, uint64_t length) > -- > 2.1.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx