Re: [PATCH 05/12] drm/i915/bdw: implement alloc/free for 4lvl

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Feb 20, 2015 at 11:15 PM, Michel Thierry
<michel.thierry@xxxxxxxxx> wrote:
> From: Ben Widawsky <benjamin.widawsky@xxxxxxxxx>
>
> The code for 4lvl works just as one would expect, and nicely it is able
> to call into the existing 3lvl page table code to handle all of the
> lower levels.
>
> PML4 has no special attributes, and there will always be a PML4.
> So simply initialize it at creation, and destroy it at the end.
>
> v2: Return something at the end of gen8_alloc_va_range_4lvl to keep the
> compiler happy. And define ret only in one place.
> Updated gen8_ppgtt_unmap_pages and gen8_ppgtt_free to handle 4lvl.
>
> Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx>
> Signed-off-by: Michel Thierry <michel.thierry@xxxxxxxxx> (v2)
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 240 +++++++++++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  11 +-
>  2 files changed, 217 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 1edcc17..edada33 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -483,9 +483,12 @@ static void __pdp_fini(struct i915_page_directory_pointer_entry *pdp)
>  static void unmap_and_free_pdp(struct i915_page_directory_pointer_entry *pdp,
>                             struct drm_device *dev)
>  {
> -       __pdp_fini(pdp);
> -       if (USES_FULL_48BIT_PPGTT(dev))
> +       if (USES_FULL_48BIT_PPGTT(dev)) {
> +               __pdp_fini(pdp);

Call to __pdp_fini should be made for the 32 bit also.
The 'used_pdpes' bitmap & 'page_directory' double pointer needs to be freed
in 32 bit case also (allocated inside __pdp_init, called from
gen8_ppgtt_init_common).

> +               i915_dma_unmap_single(pdp, dev);
> +               __free_page(pdp->page);
>                 kfree(pdp);
> +       }
>  }
>
>  static int __pdp_init(struct i915_page_directory_pointer_entry *pdp,
> @@ -511,6 +514,60 @@ static int __pdp_init(struct i915_page_directory_pointer_entry *pdp,
>         return 0;
>  }
>
> +static struct i915_page_directory_pointer_entry *alloc_pdp_single(struct i915_hw_ppgtt *ppgtt,
> +                                              struct i915_pml4 *pml4)
> +{
> +       struct drm_device *dev = ppgtt->base.dev;
> +       struct i915_page_directory_pointer_entry *pdp;
> +       int ret;
> +
> +       BUG_ON(!USES_FULL_48BIT_PPGTT(dev));
> +
> +       pdp = kmalloc(sizeof(*pdp), GFP_KERNEL);
> +       if (!pdp)
> +               return ERR_PTR(-ENOMEM);
> +
> +       pdp->page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
> +       if (!pdp->page) {
> +               kfree(pdp);
> +               return ERR_PTR(-ENOMEM);
> +       }
> +
> +       ret = __pdp_init(pdp, dev);
> +       if (ret) {
> +               __free_page(pdp->page);
> +               kfree(pdp);
> +               return ERR_PTR(ret);
> +       }
> +
> +       i915_dma_map_px_single(pdp, dev);
> +
> +       return pdp;
> +}
> +
> +static void pml4_fini(struct i915_pml4 *pml4)
> +{
> +       struct i915_hw_ppgtt *ppgtt =
> +               container_of(pml4, struct i915_hw_ppgtt, pml4);
> +       i915_dma_unmap_single(pml4, ppgtt->base.dev);
> +       __free_page(pml4->page);
> +       /* HACK */
> +       pml4->page = NULL;
> +}
> +
> +static int pml4_init(struct i915_hw_ppgtt *ppgtt)
> +{
> +       struct i915_pml4 *pml4 = &ppgtt->pml4;
> +
> +       pml4->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +       if (!pml4->page)
> +               return -ENOMEM;
> +
> +       i915_dma_map_px_single(pml4, ppgtt->base.dev);
> +
> +       return 0;
> +}
> +
>  /* Broadwell Page Directory Pointer Descriptors */
>  static int gen8_write_pdp(struct intel_engine_cs *ring,
>                           unsigned entry,
> @@ -712,14 +769,13 @@ static void gen8_free_page_tables(struct i915_page_directory_entry *pd, struct d
>         }
>  }
>
> -static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
> +static void gen8_ppgtt_unmap_pages_3lvl(struct i915_page_directory_pointer_entry *pdp,
> +                                       struct drm_device *dev)
>  {
> -       struct pci_dev *hwdev = ppgtt->base.dev->pdev;
> -       struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; /* FIXME: 48b */
> +       struct pci_dev *hwdev = dev->pdev;
>         int i, j;
>
> -       for_each_set_bit(i, pdp->used_pdpes,
> -                       I915_PDPES_PER_PDP(ppgtt->base.dev)) {
> +       for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
>                 struct i915_page_directory_entry *pd;
>
>                 if (WARN_ON(!pdp->page_directory[i]))
> @@ -747,27 +803,73 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
>         }
>  }
>
> -static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
> +static void gen8_ppgtt_unmap_pages_4lvl(struct i915_hw_ppgtt *ppgtt)
>  {
> +       struct pci_dev *hwdev = ppgtt->base.dev->pdev;
>         int i;
>
> -       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> -               for_each_set_bit(i, ppgtt->pdp.used_pdpes,
> -                                I915_PDPES_PER_PDP(ppgtt->base.dev)) {
> -                       if (WARN_ON(!ppgtt->pdp.page_directory[i]))
> -                               continue;
> +       for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
> +               struct i915_page_directory_pointer_entry *pdp;
>
> -                       gen8_free_page_tables(ppgtt->pdp.page_directory[i],
> -                                             ppgtt->base.dev);
> -                       unmap_and_free_pd(ppgtt->pdp.page_directory[i],
> -                                         ppgtt->base.dev);
> -               }
> -               unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev);
> -       } else {
> -               BUG(); /* to be implemented later */
> +               if (WARN_ON(!ppgtt->pml4.pdps[i]))
> +                       continue;
> +
> +               pdp = ppgtt->pml4.pdps[i];
> +               if (!pdp->daddr)
> +                       pci_unmap_page(hwdev, pdp->daddr, PAGE_SIZE,
> +                                      PCI_DMA_BIDIRECTIONAL);
> +

For consistency & cleanup,  the call to pci_unmap_page can be replaced
with i915_dma_unmap_single.
Same can be done inside the gen8_ppgtt_unmap_pages_3lvl function also.

> +               gen8_ppgtt_unmap_pages_3lvl(ppgtt->pml4.pdps[i],
> +                                           ppgtt->base.dev);
>         }
>  }
>
> +static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
> +{
> +       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
> +               gen8_ppgtt_unmap_pages_3lvl(&ppgtt->pdp, ppgtt->base.dev);
> +       else
> +               gen8_ppgtt_unmap_pages_4lvl(ppgtt);
> +}
> +
> +static void gen8_ppgtt_free_3lvl(struct i915_page_directory_pointer_entry *pdp,
> +                                struct drm_device *dev)
> +{
> +       int i;
> +
> +       for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
> +               if (WARN_ON(!pdp->page_directory[i]))
> +                       continue;
> +
> +               gen8_free_page_tables(pdp->page_directory[i], dev);
> +               unmap_and_free_pd(pdp->page_directory[i], dev);
> +       }
> +
> +       unmap_and_free_pdp(pdp, dev);
> +}
> +
> +static void gen8_ppgtt_free_4lvl(struct i915_hw_ppgtt *ppgtt)
> +{
> +       int i;
> +
> +       for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
> +               if (WARN_ON(!ppgtt->pml4.pdps[i]))
> +                       continue;
> +
> +               gen8_ppgtt_free_3lvl(ppgtt->pml4.pdps[i], ppgtt->base.dev);
> +       }
> +
> +       pml4_fini(&ppgtt->pml4);
> +}
> +
> +static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
> +{
> +       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
> +               gen8_ppgtt_free_3lvl(&ppgtt->pdp, ppgtt->base.dev);
> +       else
> +               gen8_ppgtt_free_4lvl(ppgtt);
> +}
> +
>  static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
>  {
>         struct i915_hw_ppgtt *ppgtt =

Is the call to 'gen8_ppgtt_unmap_pages' really needed from
'gen8_ppgtt_cleanup' function,
considering that gen8_ppgtt_free will do the unmap operation also, for
the pml4, pdp, pd & pt pages.

> @@ -1040,12 +1142,74 @@ err_out:
>         return ret;
>  }
>
> -static int __noreturn gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
> -                                              struct i915_pml4 *pml4,
> -                                              uint64_t start,
> -                                              uint64_t length)
> +static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
> +                                   struct i915_pml4 *pml4,
> +                                   uint64_t start,
> +                                   uint64_t length)
>  {
> -       BUG(); /* to be implemented later */
> +       DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
> +       struct i915_hw_ppgtt *ppgtt =
> +               container_of(vm, struct i915_hw_ppgtt, base);
> +       struct i915_page_directory_pointer_entry *pdp;
> +       const uint64_t orig_start = start;
> +       const uint64_t orig_length = length;
> +       uint64_t temp, pml4e;
> +       int ret = 0;
> +
> +       /* Do the pml4 allocations first, so we don't need to track the newly
> +        * allocated tables below the pdp */
> +       bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
> +
> +       /* The page_directoryectory and pagetable allocations are done in the shared 3
> +        * and 4 level code. Just allocate the pdps.
> +        */
> +       gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
> +               if (!pdp) {
> +                       WARN_ON(test_bit(pml4e, pml4->used_pml4es));
> +                       pdp = alloc_pdp_single(ppgtt, pml4);
> +                       if (IS_ERR(pdp))
> +                               goto err_alloc;
> +
> +                       pml4->pdps[pml4e] = pdp;
> +                       set_bit(pml4e, new_pdps);
> +                       trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, pml4e,
> +                                                  pml4e << GEN8_PML4E_SHIFT,
> +                                                  GEN8_PML4E_SHIFT);
> +
> +               }
> +       }
> +
> +       WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
> +            "The allocation has spanned more than 512GB. "
> +            "It is highly likely this is incorrect.");
> +
> +       start = orig_start;
> +       length = orig_length;
> +
> +       gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
> +               BUG_ON(!pdp);
> +
> +               ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
> +               if (ret)
> +                       goto err_out;
> +       }
> +
> +       bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
> +                 GEN8_PML4ES_PER_PML4);
> +
> +       return 0;
> +
> +err_out:
> +       start = orig_start;
> +       length = orig_length;
> +       gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e)
> +               gen8_ppgtt_free_3lvl(pdp, vm->dev);
> +
> +err_alloc:
> +       for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
> +               unmap_and_free_pdp(pdp, vm->dev);
> +

If __gen8_alloc_vma_range_3lvl returns an error, then there can be 2
calls to unmap_and_free_pdp
for the same pdp value. The gen8_ppgtt_free_3lvl will also call the
unmap_and_free_pdp for the newly allocated
pdp.
Already __gen8_alloc_vma_range_3lvl seems to be error handling
internally, i.e. it is de-allocating the newly
allocated pages for page directory & page tables, if there is an
allocation failure somewhere.
So similarly __gen8_alloc_vma_range_4lvl function can just do the
de-allocation for the newly allocated pdps ('new_pdps').

> +       return ret;
>  }
>
>  static int gen8_alloc_va_range(struct i915_address_space *vm,
> @@ -1054,16 +1218,19 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
>         struct i915_hw_ppgtt *ppgtt =
>                 container_of(vm, struct i915_hw_ppgtt, base);
>
> -       if (!USES_FULL_48BIT_PPGTT(vm->dev))
> -               return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
> -       else
> +       if (USES_FULL_48BIT_PPGTT(vm->dev))
>                 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
> +       else
> +               return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
>  }
>
>  static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt)
>  {
>         unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
> -       unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev);
> +       if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
> +               pml4_fini(&ppgtt->pml4);
> +       else
> +               unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev);
>  }
>
>  /**
> @@ -1086,14 +1253,21 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size)
>
>         ppgtt->switch_mm = gen8_mm_switch;
>
> -       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> +       if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> +               int ret = pml4_init(ppgtt);
> +               if (ret) {
> +                       unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
> +                       return ret;
> +               }
> +       } else {
>                 int ret = __pdp_init(&ppgtt->pdp, false);
>                 if (ret) {
>                         unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
>                         return ret;
>                 }
> -       } else
> -               return -EPERM; /* Not yet implemented */
> +
> +               trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT);
> +       }
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 144858e..1477f54 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -87,6 +87,7 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>   */
>  #define GEN8_PML4ES_PER_PML4           512
>  #define GEN8_PML4E_SHIFT               39
> +#define GEN8_PML4E_MASK                        (GEN8_PML4ES_PER_PML4 - 1)
>  #define GEN8_PDPE_SHIFT                        30
>  /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
>   * tables */
> @@ -427,6 +428,14 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
>              temp = min(temp, length),                                  \
>              start += temp, length -= temp)
>
> +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)      \
> +       for (iter = gen8_pml4e_index(start), pdp = (pml4)->pdps[iter];  \
> +            length > 0 && iter < GEN8_PML4ES_PER_PML4;                 \
> +            pdp = (pml4)->pdps[++iter],                                \
> +            temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,   \
> +            temp = min(temp, length),                                  \
> +            start += temp, length -= temp)
> +
>  #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)         \
>         gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev))
>
> @@ -458,7 +467,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address)
>
>  static inline uint32_t gen8_pml4e_index(uint64_t address)
>  {
> -       BUG(); /* For 64B */
> +       return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
>  }
>
>  static inline size_t gen8_pte_count(uint64_t addr, uint64_t length)
> --
> 2.1.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux