Re: [PATCH 14/19] drm/i915: Remove bitmap tracking for used-pdpes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2 February 2017 at 15:02, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote:
> We only operate on known extents (both for alloc/clear) and so we can use
> both the knowledge of the bind/unbind range along with the knowledge of
> the existing pagetable to avoid having to allocate temporary and
> auxiliary bitmaps.
>
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 275 +++++++++++-------------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |   3 +-
>  2 files changed, 84 insertions(+), 194 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 15e95904931f..99319461f86c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -526,24 +526,13 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
>  static int __pdp_init(struct i915_address_space *vm,
>                       struct i915_page_directory_pointer *pdp)
>  {
> -       size_t pdpes = I915_PDPES_PER_PDP(vm->i915);
> -       int i;
> -
> -       pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
> -                                 sizeof(unsigned long),
> -                                 GFP_KERNEL);
> -       if (!pdp->used_pdpes)
> -               return -ENOMEM;
> +       const unsigned int pdpes = I915_PDPES_PER_PDP(vm->i915);
> +       unsigned int i;
>
>         pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
> -                                           GFP_KERNEL);
> -       if (!pdp->page_directory) {
> -               kfree(pdp->used_pdpes);
> -               /* the PDP might be the statically allocated top level. Keep it
> -                * as clean as possible */
> -               pdp->used_pdpes = NULL;
> +                                           GFP_KERNEL | __GFP_NOWARN);
> +       if (unlikely(!pdp->page_directory))
>                 return -ENOMEM;
> -       }
>
>         for (i = 0; i < pdpes; i++)
>                 pdp->page_directory[i] = vm->scratch_pd;
> @@ -553,7 +542,6 @@ static int __pdp_init(struct i915_address_space *vm,
>
>  static void __pdp_fini(struct i915_page_directory_pointer *pdp)
>  {
> -       kfree(pdp->used_pdpes);
>         kfree(pdp->page_directory);
>         pdp->page_directory = NULL;
>  }
> @@ -611,23 +599,12 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
>  static void gen8_initialize_pml4(struct i915_address_space *vm,
>                                  struct i915_pml4 *pml4)
>  {
> -       gen8_ppgtt_pml4e_t scratch_pml4e;
> -
> -       scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
> -                                         I915_CACHE_LLC);
> -
> -       fill_px(vm, pml4, scratch_pml4e);
> -}
> -
> -static void
> -gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
> -                    struct i915_page_directory_pointer *pdp,
> -                    int index)
> -{
> -       gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4);
> +       unsigned int i;
>
> -       pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
> -       kunmap_atomic(pagemap);
> +       fill_px(vm, pml4,
> +               gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
> +       for (i = 0; i < GEN8_PML4ES_PER_PML4; i++)
> +               pml4->pdps[i] = vm->scratch_pdp;
>  }
>
>  /* Broadwell Page Directory Pointer Descriptors */
> @@ -781,15 +758,12 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>                         continue;
>
>                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
> -               __clear_bit(pdpe, pdp->used_pdpes);
> +               pdp->used_pdpes--;
>
>                 free_pd(vm, pd);
>         }
>
> -       if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
> -               return true;
> -
> -       return false;
> +       return !pdp->used_pdpes;
>  }
>
>  static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
> @@ -798,6 +772,19 @@ static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
>         gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
>  }
>
> +static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
> +                                struct i915_page_directory_pointer *pdp,
> +                                unsigned int pml4e)
> +{
> +       gen8_ppgtt_pml4e_t *vaddr;
> +
> +       pml4->pdps[pml4e] = pdp;
> +
> +       vaddr = kmap_atomic_px(pml4);
> +       vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
> +       kunmap_atomic(vaddr);
> +}
> +
>  /* Removes entries from a single pml4.
>   * This is the top-level structure in 4-level page tables used on gen8+.
>   * Empty entries are always scratch pml4e.
> @@ -808,19 +795,18 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
>         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>         struct i915_pml4 *pml4 = &ppgtt->pml4;
>         struct i915_page_directory_pointer *pdp;
> -       uint64_t pml4e;
> +       unsigned int pml4e;
>
>         GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915));
>
>         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -               if (WARN_ON(!pml4->pdps[pml4e]))
> -                       break;
> +               if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
> +                       continue;
>
> -               if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
> -                       __clear_bit(pml4e, pml4->used_pml4es);
> -                       gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
> -                       free_pdp(vm, pdp);
> -               }
> +               gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
> +               __clear_bit(pml4e, pml4->used_pml4es);
> +
> +               free_pdp(vm, pdp);
>         }
>  }
>
> @@ -1017,7 +1003,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
>  {
>         int i;
>
> -       for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
> +       for (i = 0; i < I915_PDPES_PER_PDP(vm->i915); i++) {
>                 if (pdp->page_directory[i] == vm->scratch_pd)
>                         continue;
>
> @@ -1088,65 +1074,6 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  }
>
>  /**
> - * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
> - * @vm:        Master vm structure.
> - * @pdp:       Page directory pointer for this address range.
> - * @start:     Starting virtual address to begin allocations.
> - * @length:    Size of the allocations.
> - * @new_pds:   Bitmap set by function with new allocations. Likely used by the
> - *             caller to free on error.
> - *
> - * Allocate the required number of page directories starting at the pde index of
> - * @start, and ending at the pde index @start + @length. This function will skip
> - * over already allocated page directories within the range, and only allocate
> - * new ones, setting the appropriate pointer within the pdp as well as the
> - * correct position in the bitmap @new_pds.
> - *
> - * The function will only allocate the pages within the range for a give page
> - * directory pointer. In other words, if @start + @length straddles a virtually
> - * addressed PDP boundary (512GB for 4k pages), there will be more allocations
> - * required by the caller, This is not currently possible, and the BUG in the
> - * code will prevent it.
> - *
> - * Return: 0 if success; negative error code otherwise.
> - */
> -static int
> -gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
> -                                 struct i915_page_directory_pointer *pdp,
> -                                 uint64_t start,
> -                                 uint64_t length,
> -                                 unsigned long *new_pds)
> -{
> -       struct i915_page_directory *pd;
> -       uint32_t pdpe;
> -       uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
> -
> -       WARN_ON(!bitmap_empty(new_pds, pdpes));
> -
> -       gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -               if (test_bit(pdpe, pdp->used_pdpes))
> -                       continue;
> -
> -               pd = alloc_pd(vm);
> -               if (IS_ERR(pd))
> -                       goto unwind_out;
> -
> -               gen8_initialize_pd(vm, pd);
> -               pdp->page_directory[pdpe] = pd;
> -               __set_bit(pdpe, new_pds);
> -               trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
> -       }
> -
> -       return 0;
> -
> -unwind_out:
> -       for_each_set_bit(pdpe, new_pds, pdpes)
> -               free_pd(vm, pdp->page_directory[pdpe]);
> -
> -       return -ENOMEM;
> -}
> -
> -/**
>   * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
>   * @vm:        Master vm structure.
>   * @pml4:      Page map level 4 for this address range.
> @@ -1166,23 +1093,19 @@ static int
>  gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>                                   struct i915_pml4 *pml4,
>                                   uint64_t start,
> -                                 uint64_t length,
> -                                 unsigned long *new_pdps)
> +                                 uint64_t length)
>  {
>         struct i915_page_directory_pointer *pdp;
>         uint32_t pml4e;
>
> -       WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
> -
>         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
>                 if (!test_bit(pml4e, pml4->used_pml4es)) {
>                         pdp = alloc_pdp(vm);
>                         if (IS_ERR(pdp))
> -                               goto unwind_out;
> +                               return PTR_ERR(pdp);
>
>                         gen8_initialize_pdp(vm, pdp);
>                         pml4->pdps[pml4e] = pdp;
> -                       __set_bit(pml4e, new_pdps);
>                         trace_i915_page_directory_pointer_entry_alloc(vm,
>                                                                       pml4e,
>                                                                       start,
> @@ -1191,34 +1114,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>         }
>
>         return 0;
> -
> -unwind_out:
> -       for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
> -               free_pdp(vm, pml4->pdps[pml4e]);
> -
> -       return -ENOMEM;
> -}
> -
> -static void
> -free_gen8_temp_bitmaps(unsigned long *new_pds)
> -{
> -       kfree(new_pds);
> -}
> -
> -/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
> - * of these are based on the number of PDPEs in the system.
> - */
> -static int __must_check
> -alloc_gen8_temp_bitmaps(unsigned long **new_pds, uint32_t pdpes)
> -{
> -       unsigned long *pds;
> -
> -       pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
> -       if (!pds)
> -               return -ENOMEM;
> -
> -       *new_pds = pds;
> -       return 0;
>  }
>
>  static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
> @@ -1227,47 +1122,37 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>                                     uint64_t length)
>  {
>         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> -       unsigned long *new_page_dirs;
>         struct i915_page_directory *pd;
> -       uint32_t pdpe;
> -       uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
> +       u64 from = start;
> +       unsigned int pdpe;
>         int ret;
>
> -       ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
> -       if (ret)
> -               return ret;
> -
> -       /* Do the allocations first so we can easily bail out */
> -       ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
> -                                               new_page_dirs);
> -       if (ret) {
> -               free_gen8_temp_bitmaps(new_page_dirs);
> -               return ret;
> -       }
> -
> -       /* For every page directory referenced, allocate page tables */
>         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -               ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
> -               if (ret)
> -                       goto err_out;
> +               if (pd == vm->scratch_pd) {
> +                       pd = alloc_pd(vm);
> +                       if (IS_ERR(pd))
> +                               goto unwind;
>
> -               if (test_and_set_bit(pdpe, pdp->used_pdpes))
> +                       gen8_initialize_pd(vm, pd);
>                         gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
> +                       pdp->used_pdpes++;
> +               }
> +
> +               ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
> +               if (unlikely(ret)) {
> +                       gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
> +                       pdp->used_pdpes--;
> +                       free_pd(vm, pd);
> +                       goto unwind;
> +               }
>         }
>
> -       /* Allocations have completed successfully, so set the bitmaps, and do
> -        * the mappings. */
> -       free_gen8_temp_bitmaps(new_page_dirs);
>         mark_tlbs_dirty(ppgtt);
>         return 0;
>
> -err_out:
> -       for_each_set_bit(pdpe, new_page_dirs, pdpes)
> -               free_pd(vm, pdp->page_directory[pdpe]);
> -
> -       free_gen8_temp_bitmaps(new_page_dirs);
> -       mark_tlbs_dirty(ppgtt);
> -       return ret;
> +unwind:
> +       gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
> +       return -ENOMEM;
>  }
>
>  static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
> @@ -1287,8 +1172,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
>         /* The pagedirectory and pagetable allocations are done in the shared 3
>          * and 4 level code. Just allocate the pdps.
>          */
> -       ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
> -                                               new_pdps);
> +       ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length);
>         if (ret)
>                 return ret;
>
> @@ -1340,7 +1224,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
>                 uint64_t pd_start = start;
>                 uint32_t pde;
>
> -               if (!test_bit(pdpe, pdp->used_pdpes))
> +               if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd)
>                         continue;
>
>                 seq_printf(m, "\tPDPE #%d\n", pdpe);
> @@ -1407,29 +1291,34 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>
>  static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
>  {
s/gen8_preallocate_top_level_pdps/gen8_preallocate_top_level_pdp/ ?

Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux