There is no need to preallocate the aliasing PPGTT. The code is properly plubmed now to treat this address space like any other. v2: Updated for CHV. Note CHV doesn't support 64b address space. Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 281 ++++++++++++++++++++---------------- 1 file changed, 153 insertions(+), 128 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d67d803..959054c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -554,14 +554,14 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, return gen8_write_pdp(ring, 0, ppgtt->pml4.daddr, synchronous); } -static void gen8_ppgtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length, - bool use_scratch) +/* Helper function clear a range of PTEs. The range may span multiple page + * tables. */ +static void gen8_ppgtt_clear_pte_range(struct i915_hw_ppgtt *ppgtt, + struct i915_pagedirpo *pdp, + uint64_t start, + uint64_t length, + bool scratch) { - struct i915_hw_ppgtt *ppgtt = - container_of(vm, struct i915_hw_ppgtt, base); - struct i915_pagedirpo *pdp = &ppgtt->pdp; /* FIXME: 48b */ gen8_gtt_pte_t *pt_vaddr, scratch_pte; unsigned pdpe = gen8_pdpe_index(start); unsigned pde = gen8_pde_index(start); @@ -570,7 +570,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, unsigned last_pte, i; scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, - I915_CACHE_LLC, use_scratch); + I915_CACHE_LLC, scratch); while (num_entries) { struct i915_pagedir *pd = pdp->pagedirs[pdpe]; @@ -600,23 +600,21 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, } } -static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, - struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level) +static void gen8_ppgtt_insert_pte_entries(struct i915_pagedirpo *pdp, + struct sg_page_iter *sg_iter, + uint64_t start, + size_t pages, + enum i915_cache_level cache_level, + bool flush_pt) { - struct i915_hw_ppgtt *ppgtt = - container_of(vm, struct i915_hw_ppgtt, base); - struct i915_pagedirpo *pdp = &ppgtt->pdp; /* FIXME: 48b */ gen8_gtt_pte_t *pt_vaddr; unsigned pdpe = gen8_pdpe_index(start); unsigned pde = gen8_pde_index(start); unsigned pte = gen8_pte_index(start); - struct sg_page_iter sg_iter; pt_vaddr = NULL; - for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { + while (pages-- && __sg_page_iter_next(sg_iter)) { if (pt_vaddr == NULL) { struct i915_pagedir *pd = pdp->pagedirs[pdpe]; struct i915_pagetab *pt = pd->page_tables[pde]; @@ -625,10 +623,10 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, } pt_vaddr[pte] = - gen8_pte_encode(sg_page_iter_dma_address(&sg_iter), + gen8_pte_encode(sg_page_iter_dma_address(sg_iter), cache_level, true); if (++pte == GEN8_PTES_PER_PT) { - if (!HAS_LLC(ppgtt->base.dev)) + if (flush_pt) drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); kunmap_atomic(pt_vaddr); pt_vaddr = NULL; @@ -640,7 +638,7 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, } } if (pt_vaddr) { - if (!HAS_LLC(ppgtt->base.dev)) + if (flush_pt) drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); kunmap_atomic(pt_vaddr); } @@ -730,10 +728,14 @@ static void gen8_map_page_directory_pointer(struct i915_pml4 *pml4, kunmap_atomic(pagemap); } -static void gen8_unmap_vma_3lvl(struct i915_address_space *vm, - struct i915_pagedirpo *pdp, - uint64_t start, uint64_t length) +/* Returns 1 if the a PDP(s) has been freed and the caller could potentially + * cleanup. */ +static int gen8_unmap_vma_3lvl(struct i915_address_space *vm, + struct i915_pagedirpo *pdp, + uint64_t start, uint64_t length) { + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); struct drm_device *dev = vm->dev; struct i915_pagedir *pd; struct i915_pagetab *pt; @@ -742,7 +744,7 @@ static void gen8_unmap_vma_3lvl(struct i915_address_space *vm, if (!pdp || !pdp->pagedirs) { /* If pagedirs are already free, there is nothing to do.*/ - return; + return 0; } gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) { @@ -784,8 +786,6 @@ static void gen8_unmap_vma_3lvl(struct i915_address_space *vm, gen8_pte_count(pd_start, pd_len)); if (bitmap_empty(pt->used_ptes, GEN8_PTES_PER_PT)) { - struct i915_hw_ppgtt *ppgtt = - container_of(vm, struct i915_hw_ppgtt, base); trace_i915_pagetable_destroy(vm, pde, pd_start & GENMASK_ULL(64, GEN8_PDE_SHIFT), @@ -794,7 +794,9 @@ static void gen8_unmap_vma_3lvl(struct i915_address_space *vm, /* This may be nixed later. Optimize? */ gen8_unmap_pagetable(ppgtt, pd, pde); } else { - gen8_ppgtt_clear_range(vm, pd_start, pd_len, true); + gen8_ppgtt_clear_pte_range(ppgtt, pdp, + pd_start, pd_len, + true); } } @@ -809,12 +811,14 @@ static void gen8_unmap_vma_3lvl(struct i915_address_space *vm, } if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev))) { - /* TODO: When pagetables are fully dynamic: - free_pdp_single(pdp, dev); */ + free_pdp_single(pdp, dev); trace_i915_pagedirpo_destroy(vm, 0, orig_start & GENMASK_ULL(64, GEN8_PML4E_SHIFT), GEN8_PML4E_SHIFT); + return 1; } + + return 0; } static void gen8_unmap_vma_4lvl(struct i915_address_space *vm, @@ -824,10 +828,15 @@ static void gen8_unmap_vma_4lvl(struct i915_address_space *vm, struct i915_pagedirpo *pdp; uint64_t temp, pml4e; + BUG_ON(I915_PDPES_PER_PDP(vm->dev) != 512); gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { - gen8_unmap_vma_3lvl(vm, pdp, start, length); - if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(vm->dev))) + if (!pdp) + continue; + + if (gen8_unmap_vma_3lvl(vm, pdp, start, length)) { clear_bit(pml4e, pml4->used_pml4es); + pml4->pdps[pml4e] = NULL; + } } } @@ -848,6 +857,15 @@ static void gen8_unmap_vma(struct i915_vma *vma) __gen8_teardown_va_range(vma->vm, vma->node.start, vma->node.size); } +static void gen8_unmap_aliasing_vma(struct i915_vma *vma) +{ + struct drm_device *dev = vma->vm->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + __gen8_teardown_va_range(&dev_priv->mm.aliasing_ppgtt->base, + vma->node.start, vma->node.size); +} + static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) { trace_i915_va_teardown(&ppgtt->base, @@ -855,9 +873,14 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) __gen8_teardown_va_range(&ppgtt->base, ppgtt->base.start, ppgtt->base.total); - WARN_ON(!bitmap_empty(ppgtt->pdp.used_pdpes, - I915_PDPES_PER_PDP(ppgtt->base.dev))); - free_pdp_single(&ppgtt->pdp, ppgtt->base.dev); + if (HAS_48B_PPGTT(ppgtt->base.dev)) { + WARN_ON(!bitmap_empty(ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4)); + pml4_fini(&ppgtt->pml4); + } else { + WARN_ON(!bitmap_empty(ppgtt->pdp.used_pdpes, + I915_PDPES_PER_PDP(ppgtt->base.dev))); + free_pdp_single(&ppgtt->pdp, ppgtt->base.dev); + } } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -1036,12 +1059,20 @@ err_out: return -ENOMEM; } -static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, - struct i915_pagedirpo *pdp, - uint64_t start, - uint64_t length) +/** + * __gen8_alloc_va_range_3lvl() Map PDEs for a given range + * @ + * + */ +static int __gen8_alloc_vma_range_3lvl(struct i915_pagedirpo *pdp, + struct i915_vma *vma, + struct sg_page_iter *sg_iter, + uint64_t start, + uint64_t length, + u32 flags) { unsigned long *new_page_dirs, **new_page_tables; + struct i915_address_space *vm = vma->vm; struct drm_device *dev = vm->dev; struct i915_pagedir *pd; const uint64_t orig_start = start; @@ -1051,6 +1082,8 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, size_t pdpes = I915_PDPES_PER_PDP(dev); int ret; + BUG_ON(!sg_iter->sg); + #ifdef CONFIG_32BIT /* Disallow 64b address on 32b platforms. Nothing is wrong with doing * this in hardware, but a lot of the drm code is not prepared to handle @@ -1096,16 +1129,23 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) { BUG_ON(!pt); + BUG_ON(!pd_len); + BUG_ON(!gen8_pte_count(pd_start, pd_len)); + BUG_ON(!sg_iter->__nents); bitmap_set(pt->used_ptes, gen8_pte_index(pd_start), gen8_pte_count(pd_start, pd_len)); + gen8_ppgtt_insert_pte_entries(pdp, sg_iter, pd_start, + gen8_pte_count(pd_start, pd_len), + flags, !HAS_LLC(vm->dev)); set_bit(pde, pd->used_pdes); } set_bit(pdpe, pdp->used_pdpes); + gen8_map_pagetable_range(vm, pd, start, length); gen8_map_page_directory(pdp, pd, pdpe, dev); } @@ -1126,18 +1166,21 @@ err_out: return ret; } -static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) +static int __gen8_alloc_vma_range_4lvl(struct i915_pml4 *pml4, + struct i915_vma *vma, + struct sg_page_iter *sg_iter, + u32 flags) { DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); + struct i915_address_space *vm = vma->vm; struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); struct i915_pagedirpo *pdp; + uint64_t start = vma->node.start, length = vma->node.size; const uint64_t orig_start = start; const uint64_t orig_length = length; uint64_t temp, pml4e; + int ret; /* Do the pml4 allocations first, so we don't need to track the newly * allocated tables below the pdp */ @@ -1168,11 +1211,10 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, length = orig_length; gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { - int ret; - BUG_ON(!pdp); - ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); + ret = __gen8_alloc_vma_range_3lvl(pdp, vma, sg_iter, + start, length, flags); if (ret) goto err_out; @@ -1198,39 +1240,36 @@ err_out: err_alloc: for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) free_pdp_single(pdp, vm->dev); + + return ret; } -static int __gen8_alloc_va_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) +static int __gen8_map_vma(struct i915_address_space *vm, struct i915_vma *vma, u32 flags) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); + struct sg_page_iter sg_iter; - if (HAS_48B_PPGTT(vm->dev)) - return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); + __sg_page_iter_start(&sg_iter, vma->obj->pages->sgl, sg_nents(vma->obj->pages->sgl), 0); + if (HAS_48B_PPGTT(vma->vm->dev)) + return __gen8_alloc_vma_range_4lvl(&ppgtt->pml4, vma, &sg_iter, flags); else - return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); + return __gen8_alloc_vma_range_3lvl(&ppgtt->pdp, vma, &sg_iter, + vma->node.start, + vma->node.size, + flags); } -static int gen8_map_vma(struct i915_vma *vma, u32 flags) +static int gen8_map_aliasing_vma(struct i915_vma *vma, u32 flags) { - int ret = __gen8_alloc_va_range(vma->vm, vma->node.start,vma->node.size); - if (!ret) { - BUG_ON(flags >= I915_CACHE_MAX); - gen8_ppgtt_insert_entries(vma->vm, vma->obj->pages, vma->node.start, - flags); - } - - return ret; + struct drm_device *dev = vma->vm->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + return __gen8_map_vma(&dev_priv->mm.aliasing_ppgtt->base, vma, flags); } -static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt) +static int gen8_map_vma(struct i915_vma *vma, u32 flags) { - free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev); - if (HAS_48B_PPGTT(ppgtt->base.dev)) - pml4_fini(&ppgtt->pml4); - else - free_pdp_single(&ppgtt->pdp, ppgtt->base.dev); + return __gen8_map_vma(vma->vm, vma, flags); } /** @@ -1240,12 +1279,18 @@ static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt) * space. * */ -static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) +static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing) { ppgtt->base.start = 0; - ppgtt->base.total = size; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->enable = gen8_ppgtt_enable; + if (aliasing) { + ppgtt->base.map_vma = gen8_map_aliasing_vma; + ppgtt->base.unmap_vma = gen8_unmap_aliasing_vma; + } else { + ppgtt->base.map_vma = gen8_map_vma; + ppgtt->base.unmap_vma = gen8_unmap_vma; + } ppgtt->scratch_pd = alloc_pt_scratch(ppgtt->base.dev); if (IS_ERR(ppgtt->scratch_pd)) @@ -1257,6 +1302,7 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev); return ret; } + ppgtt->base.total = (1ULL<<48); ppgtt->switch_mm = gen8_48b_mm_switch; } else { int ret = __pdp_init(&ppgtt->pdp, false); @@ -1266,61 +1312,13 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) } ppgtt->switch_mm = gen8_legacy_mm_switch; + ppgtt->base.total = (1ULL<<32); trace_i915_pagedirpo_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT); } return 0; } -static int gen8_aliasing_ppgtt_init(struct i915_hw_ppgtt *ppgtt) -{ - struct drm_device *dev = ppgtt->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct i915_pagedirpo *pdp = &ppgtt->pdp; /* FIXME: 48b */ - struct i915_pagedir *pd; - uint64_t temp, start = 0, size = dev_priv->gtt.base.total; - uint32_t pdpe; - int ret; - - ret = gen8_ppgtt_init_common(ppgtt, size); - if (ret) - return ret; - - ret = __gen8_alloc_va_range(&ppgtt->base, start, size); - if (ret) { - gen8_ppgtt_fini_common(ppgtt); - return ret; - } - - /* FIXME: PML4 */ - gen8_for_each_pdpe(pd, pdp, start, size, temp, pdpe) - gen8_map_pagetable_range(&ppgtt->base, pd, start, size); - - BUG(); // we need a map_vma for aliasing - ppgtt->base.map_vma = NULL; - ppgtt->base.unmap_vma = NULL; - - gen8_ppgtt_clear_range(&ppgtt->base, 0, dev_priv->gtt.base.total, true); - - return 0; -} - -static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) -{ - struct drm_device *dev = ppgtt->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - int ret; - - ret = gen8_ppgtt_init_common(ppgtt, dev_priv->gtt.base.total); - if (ret) - return ret; - - ppgtt->base.map_vma = gen8_map_vma; - ppgtt->base.unmap_vma = gen8_unmap_vma; - - return 0; -} - static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) { struct i915_address_space *vm = &ppgtt->base; @@ -1687,10 +1685,10 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, kunmap_atomic(pt_vaddr); } -static int gen6_alloc_va_range(struct i915_vma *vma, u32 flags) +static int _gen6_map_vma(struct i915_address_space *vm, + struct i915_vma *vma, u32 flags) { DECLARE_BITMAP(new_page_tables, I915_PDES_PER_PD); - struct i915_address_space *vm = vma->vm; struct drm_device *dev = vm->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct i915_hw_ppgtt *ppgtt = @@ -1780,9 +1778,20 @@ unwind_out: return ret; } -static void gen6_unmap_vma(struct i915_vma *vma) +static int gen6_map_aliasing_vma(struct i915_vma *vma, u32 flags) +{ + struct drm_i915_private *dev_priv = vma->vm->dev->dev_private; + return _gen6_map_vma(&dev_priv->mm.aliasing_ppgtt->base, vma, flags); +} + +static int gen6_map_vma(struct i915_vma *vma, u32 flags) +{ + return _gen6_map_vma(vma->vm, vma, flags); +} + +static void _gen6_unmap_vma(struct i915_address_space *vm, + struct i915_vma *vma) { - struct i915_address_space *vm = vma->vm; struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); uint32_t start = vma->node.start, length = vma->node.size; @@ -1817,6 +1826,17 @@ static void gen6_unmap_vma(struct i915_vma *vma) gen6_ppgtt_clear_range(vm, orig_start, orig_length, true); } +static void gen6_unmap_aliasing_vma(struct i915_vma *vma) +{ + struct drm_i915_private *dev_priv = vma->vm->dev->dev_private; + _gen6_unmap_vma(&dev_priv->mm.aliasing_ppgtt->base, vma); +} + +static void gen6_unmap_vma(struct i915_vma *vma) +{ + _gen6_unmap_vma(vma->vm, vma); +} + static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) { struct i915_pagetab *pt; @@ -1944,8 +1964,13 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing) if (ret) return ret; - ppgtt->base.map_vma = gen6_alloc_va_range; - ppgtt->base.unmap_vma = gen6_unmap_vma; + if (aliasing) { + ppgtt->base.map_vma = gen6_map_aliasing_vma; + ppgtt->base.unmap_vma = gen6_unmap_aliasing_vma; + } else { + ppgtt->base.map_vma = gen6_map_vma; + ppgtt->base.unmap_vma = gen6_unmap_vma; + } ppgtt->base.cleanup = gen6_ppgtt_cleanup; ppgtt->base.start = 0; ppgtt->base.total = I915_PDES_PER_PD * GEN6_PTES_PER_PT * PAGE_SIZE; @@ -1957,8 +1982,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing) ppgtt->pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + ppgtt->pd.pd_offset / sizeof(gen6_gtt_pte_t); - if (!aliasing) - gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); + gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); gen6_map_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); @@ -1979,16 +2003,18 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt, boo if (INTEL_INFO(dev)->gen < 8) ret = gen6_ppgtt_init(ppgtt, aliasing); - else if (IS_GEN8(dev) && aliasing) - ret = gen8_aliasing_ppgtt_init(ppgtt); else if (IS_GEN8(dev)) - ret = gen8_ppgtt_init(ppgtt); + ret = gen8_ppgtt_init(ppgtt, aliasing); else BUG(); if (ret) return ret; + BUG_ON(ppgtt->base.total < dev_priv->gtt.base.total && aliasing); + if (aliasing) + ppgtt->base.total = dev_priv->gtt.base.total; + kref_init(&ppgtt->ref); drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total); i915_init_vm(dev_priv, &ppgtt->base); @@ -2450,7 +2476,6 @@ static int ggtt_bind_vma(struct i915_vma *vma, (!obj->has_aliasing_ppgtt_mapping || (cache_level != obj->cache_level))) { struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; - BUG(); appgtt->base.map_vma(vma, cache_level); vma->obj->has_aliasing_ppgtt_mapping = 1; } -- 1.9.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx