All of the page tracking structures contain a bitmap holding currently used entries. It's redundant, since we can just inspect the pointer. The only actual place where we're taking advantage of bitmaps is during ppgtt cleanup - since we're able to reduce the number of iterations. Still, we can improve that in the following commits, and iterate over the full range here. Let's replace the bitmaps with a simple counter. Cc: Arkadiusz Hiler <arkadiusz.hiler@xxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Michel Thierry <michel.thierry@xxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> Signed-off-by: Michał Winiarski <michal.winiarski@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 138 +++++++++++++----------------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 10 +-- 2 files changed, 54 insertions(+), 94 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a7d6f78..50d4861 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -426,38 +426,26 @@ static void cleanup_scratch_page(struct drm_i915_private *dev_priv, static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv) { struct i915_page_table *pt; - const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES; int ret = -ENOMEM; pt = kzalloc(sizeof(*pt), GFP_KERNEL); if (!pt) return ERR_PTR(-ENOMEM); - pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), - GFP_KERNEL); - - if (!pt->used_ptes) - goto fail_bitmap; - ret = setup_px(dev_priv, pt); - if (ret) - goto fail_page_m; + if (ret) { + kfree(pt); + return ERR_PTR(ret); + } return pt; -fail_page_m: - kfree(pt->used_ptes); -fail_bitmap: - kfree(pt); - - return ERR_PTR(ret); } static void free_pt(struct drm_i915_private *dev_priv, struct i915_page_table *pt) { cleanup_px(dev_priv, pt); - kfree(pt->used_ptes); kfree(pt); } @@ -494,23 +482,13 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv) if (!pd) return ERR_PTR(-ENOMEM); - pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), - sizeof(*pd->used_pdes), GFP_KERNEL); - if (!pd->used_pdes) - goto fail_bitmap; - ret = setup_px(dev_priv, pd); - if (ret) - goto fail_page_m; + if (ret) { + kfree(pd); + return ERR_PTR(ret); + } return pd; - -fail_page_m: - kfree(pd->used_pdes); -fail_bitmap: - kfree(pd); - - return ERR_PTR(ret); } static void free_pd(struct drm_i915_private *dev_priv, @@ -518,7 +496,6 @@ static void free_pd(struct drm_i915_private *dev_priv, { if (px_page(pd)) { cleanup_px(dev_priv, pd); - kfree(pd->used_pdes); kfree(pd); } } @@ -538,28 +515,16 @@ static int __pdp_init(struct drm_i915_private *dev_priv, { size_t pdpes = I915_PDPES_PER_PDP(dev_priv); - pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), - sizeof(unsigned long), - GFP_KERNEL); - if (!pdp->used_pdpes) - return -ENOMEM; - pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), GFP_KERNEL); - if (!pdp->page_directory) { - kfree(pdp->used_pdpes); - /* the PDP might be the statically allocated top level. Keep it - * as clean as possible */ - pdp->used_pdpes = NULL; + if (!pdp->page_directory) return -ENOMEM; - } return 0; } static void __pdp_fini(struct i915_page_directory_pointer *pdp) { - kfree(pdp->used_pdpes); kfree(pdp->page_directory); pdp->page_directory = NULL; } @@ -733,9 +698,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, GEM_BUG_ON(pte_end > GEN8_PTES); - bitmap_clear(pt->used_ptes, pte, num_entries); + pt->num_ptes -= num_entries; - if (bitmap_empty(pt->used_ptes, GEN8_PTES)) + if (pt->num_ptes == 0) return true; pt_vaddr = kmap_px(pt); @@ -768,15 +733,16 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, break; if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { - __clear_bit(pde, pd->used_pdes); + pd->num_pdes--; pde_vaddr = kmap_px(pd); pde_vaddr[pde] = scratch_pde; kunmap_px(ppgtt, pde_vaddr); free_pt(vm->i915, pt); + pd->page_table[pde] = NULL; } } - if (bitmap_empty(pd->used_pdes, I915_PDES)) + if (pd->num_pdes == 0) return true; return false; @@ -802,19 +768,20 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, break; if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { - __clear_bit(pdpe, pdp->used_pdpes); + pdp->num_pdpes--; if (USES_FULL_48BIT_PPGTT(dev_priv)) { pdpe_vaddr = kmap_px(pdp); pdpe_vaddr[pdpe] = scratch_pdpe; kunmap_px(ppgtt, pdpe_vaddr); } free_pd(vm->i915, pd); + pdp->page_directory[pdpe] = NULL; } } mark_tlbs_dirty(ppgtt); - if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) + if (pdp->num_pdpes == 0) return true; return false; @@ -843,11 +810,12 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, break; if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { - __clear_bit(pml4e, pml4->used_pml4es); + pml4->num_pml4es--; pml4e_vaddr = kmap_px(pml4); pml4e_vaddr[pml4e] = scratch_pml4e; kunmap_px(ppgtt, pml4e_vaddr); free_pdp(vm->i915, pdp); + pml4->pdps[pml4e] = NULL; } } } @@ -938,12 +906,11 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv, if (!px_page(pd)) return; - for_each_set_bit(i, pd->used_pdes, I915_PDES) { - if (WARN_ON(!pd->page_table[i])) - continue; - - free_pt(dev_priv, pd->page_table[i]); - pd->page_table[i] = NULL; + for (i = 0; i < I915_PDES; i++) { + if (pd->page_table[i]) { + free_pt(dev_priv, pd->page_table[i]); + pd->page_table[i] = NULL; + } } } @@ -1040,12 +1007,11 @@ static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv, { int i; - for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) { - if (WARN_ON(!pdp->page_directory[i])) - continue; - - gen8_free_page_tables(dev_priv, pdp->page_directory[i]); - free_pd(dev_priv, pdp->page_directory[i]); + for (i = 0; i < I915_PDPES_PER_PDP(dev_priv); i++) { + if (pdp->page_directory[i]) { + gen8_free_page_tables(dev_priv, pdp->page_directory[i]); + pdp->page_directory[i] = NULL; + } } free_pdp(dev_priv, pdp); @@ -1056,11 +1022,11 @@ static void gen8_ppgtt_cleanup_4lvl(struct drm_i915_private *dev_priv, { int i; - for_each_set_bit(i, pml4->used_pml4es, GEN8_PML4ES_PER_PML4) { - if (WARN_ON(!pml4->pdps[i])) - continue; - - gen8_ppgtt_cleanup_3lvl(dev_priv, pml4->pdps[i]); + for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { + if (pml4->pdps[i]) { + gen8_ppgtt_cleanup_3lvl(dev_priv, pml4->pdps[i]); + pml4->pdps[i] = NULL; + } } cleanup_px(dev_priv, pml4); @@ -1109,7 +1075,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, const uint64_t start_save = start; gen8_for_each_pde(pt, pd, start, length, pde) { - if (test_bit(pde, pd->used_pdes)) + if (pd->page_table[pde]) continue; pt = alloc_pt(dev_priv); @@ -1121,7 +1087,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, gen8_initialize_pt(vm, pt); pd->page_table[pde] = pt; - __set_bit(pde, pd->used_pdes); + pd->num_pdes++; trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); } @@ -1160,7 +1126,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, const uint64_t start_save = start; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (test_bit(pdpe, pdp->used_pdpes)) + if (pdp->page_directory[pdpe]) continue; pd = alloc_pd(dev_priv); @@ -1172,7 +1138,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, gen8_initialize_pd(vm, pd); pdp->page_directory[pdpe] = pd; - __set_bit(pdpe, pdp->used_pdpes); + pdp->num_pdpes++; trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); } @@ -1205,7 +1171,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, const uint64_t start_save = start; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (test_bit(pml4e, pml4->used_pml4es)) + if (pml4->pdps[pml4e]) continue; pdp = alloc_pdp(dev_priv); @@ -1217,7 +1183,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, gen8_initialize_pdp(vm, pdp); pml4->pdps[pml4e] = pdp; - __set_bit(pml4e, pml4->used_pml4es); + pml4->num_pml4es++; trace_i915_page_directory_pointer_entry_alloc(vm, pml4e, start, @@ -1286,9 +1252,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, WARN_ON(!gen8_pte_count(pd_start, pd_len)); /* Set our used ptes within the page table */ - bitmap_set(pt->used_ptes, - gen8_pte_index(pd_start), - gen8_pte_count(pd_start, pd_len)); + pt->num_ptes += gen8_pte_count(pd_start, pd_len); /* Map the PDE to the page table */ page_directory[pde] = gen8_pde_encode(px_dma(pt), @@ -1296,7 +1260,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, gen8_pte_index(start), gen8_pte_count(start, length), - bitmap_weight(pt->used_ptes, GEN8_PTES)); + pt->num_ptes); /* NB: We haven't yet mapped ptes to pages. At this * point we're still relying on insert_entries() */ @@ -1373,7 +1337,7 @@ static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, uint64_t pd_start = start; uint32_t pde; - if (!test_bit(pdpe, pdp->used_pdpes)) + if (!pdp->page_directory[pdpe]) continue; seq_printf(m, "\tPDPE #%d\n", pdpe); @@ -1381,7 +1345,7 @@ static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, uint32_t pte; gen8_pte_t *pt_vaddr; - if (!test_bit(pde, pd->used_pdes)) + if (!pd->page_table[pde]) continue; pt_vaddr = kmap_px(pt); @@ -1432,7 +1396,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) struct i915_page_directory_pointer *pdp; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (!test_bit(pml4e, pml4->used_pml4es)) + if (!pml4->pdps[pml4e]) continue; seq_printf(m, " PML4E #%llu\n", pml4e); @@ -1452,9 +1416,6 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) */ ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, start, length); - if (!ret) - bitmap_set(ppgtt->pdp.used_pdpes, gen8_pdpe_index(start), - i915_pte_count(start, length, GEN8_PDPE_SHIFT)); return ret; } @@ -1848,12 +1809,12 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, */ gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { if (pt != vm->scratch_pt) { - WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); + WARN_ON(pt->num_ptes == 0); continue; } /* We've already allocated a page table */ - WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); + WARN_ON(pt->num_ptes != 0); pt = alloc_pt(dev_priv); if (IS_ERR(pt)) { @@ -1866,8 +1827,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); gen6_initialize_pt(vm, pt); - bitmap_set(pt->used_ptes, gen6_pte_index(start), - gen6_pte_count(start, length)); + pt->num_ptes += gen6_pte_count(start, length); ppgtt->pd.page_table[pde] = pt; gen6_write_pde(&ppgtt->pd, pde, pt); @@ -1875,7 +1835,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, trace_i915_page_table_entry_map(vm, pde, pt, gen6_pte_index(start), gen6_pte_count(start, length), - bitmap_weight(pt->used_ptes, GEN6_PTES)); + pt->num_ptes); } /* Make sure write is complete before other code can use this page diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 8965bbb..1fc65ac 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -193,27 +193,27 @@ struct i915_page_dma { struct i915_page_table { struct i915_page_dma base; - unsigned long *used_ptes; + unsigned int num_ptes; }; struct i915_page_directory { struct i915_page_dma base; - unsigned long *used_pdes; + unsigned int num_pdes; struct i915_page_table *page_table[I915_PDES]; /* PDEs */ }; struct i915_page_directory_pointer { struct i915_page_dma base; - unsigned long *used_pdpes; + unsigned int num_pdpes; struct i915_page_directory **page_directory; }; struct i915_pml4 { struct i915_page_dma base; - DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4); + unsigned int num_pml4es; struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; }; @@ -477,7 +477,7 @@ static inline size_t gen8_pte_count(uint64_t address, uint64_t length) static inline dma_addr_t i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) { - return test_bit(n, ppgtt->pdp.used_pdpes) ? + return ppgtt->pdp.page_directory[n] ? px_dma(ppgtt->pdp.page_directory[n]) : px_dma(ppgtt->base.scratch_pd); } -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx