From: Ben Widawsky <benjamin.widawsky@xxxxxxxxx> This finishes off the dynamic page tables allocations, in the legacy 3 level style that already exists. Most everything has already been setup to this point, the patch finishes off the enabling by setting the appropriate function pointers. Zombie tracking: This could be a separate patch, but I found it helpful for debugging. Since we write page tables asynchronously with respect to the GPU using them, we can't actually free the page tables until we know the GPU won't use them. With this patch, that is always when the context dies. It would be possible to write a reaper to go through zombies and clean them up when under memory pressure. That exercise is left for the reader. Scratch unused pages: The object pages can get freed even if a page table still points to them. Like the zombie fix, we need to make sure we don't let our GPU access arbitrary memory when we've unmapped things. v2: Update aliasing/true ppgtt allocate/teardown/clear functions for gen 6 & 7. Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> Signed-off-by: Michel Thierry <michel.thierry@xxxxxxxxx> (v2) --- drivers/gpu/drm/i915/i915_gem_gtt.c | 378 +++++++++++++++++++++++++++++------- drivers/gpu/drm/i915/i915_gem_gtt.h | 16 +- 2 files changed, 327 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a834fa6..7001869 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -602,7 +602,7 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, } } -static void __gen8_do_map_pt(gen8_ppgtt_pde_t *pde, +static void __gen8_do_map_pt(gen8_ppgtt_pde_t * const pde, struct i915_pagetab *pt, struct drm_device *dev) { @@ -619,7 +619,7 @@ static void gen8_map_pagetable_range(struct i915_pagedir *pd, uint64_t length, struct drm_device *dev) { - gen8_ppgtt_pde_t *pagedir = kmap_atomic(pd->page); + gen8_ppgtt_pde_t * const pagedir = kmap_atomic(pd->page); struct i915_pagetab *pt; uint64_t temp, pde; @@ -632,8 +632,9 @@ static void gen8_map_pagetable_range(struct i915_pagedir *pd, kunmap_atomic(pagedir); } -static void gen8_teardown_va_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) +static void __gen8_teardown_va_range(struct i915_address_space *vm, + uint64_t start, uint64_t length, + bool dead) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); @@ -655,6 +656,13 @@ static void gen8_teardown_va_range(struct i915_address_space *vm, pdpe, vm); continue; } else { + if (dead && pd->zombie) { + WARN_ON(test_bit(pdpe, ppgtt->pdp.used_pdpes)); + free_pd_single(pd, vm->dev); + ppgtt->pdp.pagedir[pdpe] = NULL; + continue; + } + WARN(!test_bit(pdpe, ppgtt->pdp.used_pdpes), "PDPE %d not reserved, but is allocated (%p)", pdpe, vm); @@ -666,34 +674,64 @@ static void gen8_teardown_va_range(struct i915_address_space *vm, "PDE %d is not allocated, but is reserved (%p)\n", pde, vm); continue; - } else + } else { + if (dead && pt->zombie) { + WARN_ON(test_bit(pde, pd->used_pdes)); + free_pt_single(pt, vm->dev); + pd->page_tables[pde] = NULL; + continue; + } WARN(!test_bit(pde, pd->used_pdes), "PDE %d not reserved, but is allocated (%p)", pde, vm); + } bitmap_clear(pt->used_ptes, gen8_pte_index(pd_start), gen8_pte_count(pd_start, pd_len)); if (bitmap_empty(pt->used_ptes, GEN8_PTES_PER_PAGE)) { + WARN_ON(!test_and_clear_bit(pde, pd->used_pdes)); + if (!dead) { + pt->zombie = 1; + continue; + } free_pt_single(pt, vm->dev); pd->page_tables[pde] = NULL; - WARN_ON(!test_and_clear_bit(pde, pd->used_pdes)); + } } + gen8_ppgtt_clear_range(vm, pd_start, pd_len, true); + if (bitmap_empty(pd->used_pdes, GEN8_PDES_PER_PAGE)) { + WARN_ON(!test_and_clear_bit(pdpe, ppgtt->pdp.used_pdpes)); + if (!dead) { + /* We've unmapped a possibly live context. Make + * note of it so we can clean it up later. */ + pd->zombie = 1; + continue; + } free_pd_single(pd, vm->dev); ppgtt->pdp.pagedir[pdpe] = NULL; - WARN_ON(!test_and_clear_bit(pdpe, ppgtt->pdp.used_pdpes)); } } } +static void gen8_teardown_va_range(struct i915_address_space *vm, + uint64_t start, uint64_t length) +{ + __gen8_teardown_va_range(vm, start, length, false); +} + static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) { - gen8_teardown_va_range(&ppgtt->base, - ppgtt->base.start, ppgtt->base.total); + trace_i915_va_teardown(&ppgtt->base, + ppgtt->base.start, ppgtt->base.total, + VM_TO_TRACE_NAME(&ppgtt->base)); + __gen8_teardown_va_range(&ppgtt->base, + ppgtt->base.start, ppgtt->base.total, + true); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -704,58 +742,167 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) gen8_ppgtt_free(ppgtt); } -static int gen8_ppgtt_alloc_pagetabs(struct i915_pagedir *pd, +/** + * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. + * @ppgtt: Master ppgtt structure. + * @pd: Page directory for this address range. + * @start: Starting virtual address to begin allocations. + * @length Size of the allocations. + * @new_pts: Bitmap set by function with new allocations. Likely used by the + * caller to free on error. + * + * Allocate the required number of page tables. Extremely similar to + * gen8_ppgtt_alloc_pagedirs(). The main difference is here we are limited by + * the page directory boundary (instead of the page directory pointer). That + * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_pagedirs(), it is + * possible, and likely that the caller will need to use multiple calls of this + * function to achieve the appropriate allocation. + * + * Return: 0 if success; negative error code otherwise. + */ +static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt, + struct i915_pagedir *pd, uint64_t start, uint64_t length, - struct drm_device *dev) + unsigned long *new_pts) { - struct i915_pagetab *unused; + struct i915_pagetab *pt; uint64_t temp; uint32_t pde; - gen8_for_each_pde(unused, pd, start, length, temp, pde) { - BUG_ON(unused); - pd->page_tables[pde] = alloc_pt_single(dev); - if (IS_ERR(pd->page_tables[pde])) + gen8_for_each_pde(pt, pd, start, length, temp, pde) { + /* Don't reallocate page tables */ + if (pt) { + /* Scratch is never allocated this way */ + WARN_ON(pt->scratch); + /* If there is a zombie, we can reuse it and save time + * on the allocation. If we clear the zombie status and + * the caller somehow fails, we'll probably hit some + * assertions, so it's up to them to fix up the bitmaps. + */ + continue; + } + + pt = alloc_pt_single(ppgtt->base.dev); + if (IS_ERR(pt)) goto unwind_out; + + pd->page_tables[pde] = pt; + set_bit(pde, new_pts); } return 0; unwind_out: - while (pde--) - free_pt_single(pd->page_tables[pde], dev); + for_each_set_bit(pde, new_pts, GEN8_PDES_PER_PAGE) + free_pt_single(pd->page_tables[pde], ppgtt->base.dev); return -ENOMEM; } -/* bitmap of new pagedirs */ -static int gen8_ppgtt_alloc_pagedirs(struct i915_pagedirpo *pdp, +/** + * gen8_ppgtt_alloc_pagedirs() - Allocate page directories for VA range. + * @ppgtt: Master ppgtt structure. + * @pdp: Page directory pointer for this address range. + * @start: Starting virtual address to begin allocations. + * @length Size of the allocations. + * @new_pds Bitmap set by function with new allocations. Likely used by the + * caller to free on error. + * + * Allocate the required number of page directories starting at the pde index of + * @start, and ending at the pde index @start + @length. This function will skip + * over already allocated page directories within the range, and only allocate + * new ones, setting the appropriate pointer within the pdp as well as the + * correct position in the bitmap @new_pds. + * + * The function will only allocate the pages within the range for a give page + * directory pointer. In other words, if @start + @length straddles a virtually + * addressed PDP boundary (512GB for 4k pages), there will be more allocations + * required by the caller, This is not currently possible, and the BUG in the + * code will prevent it. + * + * Return: 0 if success; negative error code otherwise. + */ +static int gen8_ppgtt_alloc_pagedirs(struct i915_hw_ppgtt *ppgtt, + struct i915_pagedirpo *pdp, uint64_t start, uint64_t length, - struct drm_device *dev) + unsigned long *new_pds) { - struct i915_pagedir *unused; + struct i915_pagedir *pd; uint64_t temp; uint32_t pdpe; + BUG_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES)); + /* FIXME: PPGTT container_of won't work for 64b */ BUG_ON((start + length) > 0x800000000ULL); - gen8_for_each_pdpe(unused, pdp, start, length, temp, pdpe) { - BUG_ON(unused); - pdp->pagedir[pdpe] = alloc_pd_single(dev); + gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) { + if (pd) + continue; - if (IS_ERR(pdp->pagedir[pdpe])) + pd = alloc_pd_single(ppgtt->base.dev); + if (IS_ERR(pd)) goto unwind_out; + + pdp->pagedir[pdpe] = pd; + set_bit(pdpe, new_pds); } return 0; unwind_out: - while (pdpe--) - free_pd_single(pdp->pagedir[pdpe], dev); + for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES) + free_pd_single(pdp->pagedir[pdpe], ppgtt->base.dev); + + return -ENOMEM; +} + +static inline void +free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts) +{ + int i; + for (i = 0; i < GEN8_LEGACY_PDPES; i++) + kfree(new_pts[i]); + kfree(new_pts); + kfree(new_pds); +} + +/* Fills in the page directory bitmap, ant the array of page tables bitmap. Both + * of these are based on the number of PDPEs in the system. + */ +int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, + unsigned long ***new_pts) +{ + int i; + unsigned long *pds; + unsigned long **pts; + pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL); + if (!pds) + return -ENOMEM; + + pts = kcalloc(GEN8_PDES_PER_PAGE, sizeof(unsigned long *), GFP_KERNEL); + if (!pts) { + kfree(pds); + return -ENOMEM; + } + + for (i = 0; i < GEN8_LEGACY_PDPES; i++) { + pts[i] = kcalloc(BITS_TO_LONGS(GEN8_PDES_PER_PAGE), + sizeof(unsigned long), GFP_KERNEL); + if (!pts[i]) + goto err_out; + } + + *new_pds = pds; + *new_pts = (unsigned long **)pts; + + return 0; + +err_out: + free_gen8_temp_bitmaps(pds, pts); return -ENOMEM; } @@ -765,6 +912,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); + unsigned long *new_page_dirs, **new_page_tables; struct i915_pagedir *pd; const uint64_t orig_start = start; const uint64_t orig_length = length; @@ -772,43 +920,103 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, uint32_t pdpe; int ret; - /* Do the allocations first so we can easily bail out */ - ret = gen8_ppgtt_alloc_pagedirs(&ppgtt->pdp, start, length, - ppgtt->base.dev); +#ifndef CONFIG_64BIT + /* Disallow 64b address on 32b platforms. Nothing is wrong with doing + * this in hardware, but a lot of the drm code is not prepared to handle + * 64b offset on 32b platforms. */ + if (start + length > 0x100000000ULL) + return -E2BIG; +#endif + + /* Wrap is never okay since we can only represent 48b, and we don't + * actually use the other side of the canonical address space. + */ + if (WARN_ON(start + length < start)) + return -ERANGE; + + ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables); if (ret) return ret; + /* Do the allocations first so we can easily bail out */ + ret = gen8_ppgtt_alloc_pagedirs(ppgtt, &ppgtt->pdp, start, length, + new_page_dirs); + if (ret) { + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + return ret; + } + + /* For every page directory referenced, allocate page tables */ gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) { - ret = gen8_ppgtt_alloc_pagetabs(pd, start, length, - ppgtt->base.dev); + bitmap_zero(new_page_tables[pdpe], GEN8_PDES_PER_PAGE); + ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length, + new_page_tables[pdpe]); if (ret) goto err_out; } - /* Now mark everything we've touched as used. This doesn't allow for - * robust error checking, but it makes the code a hell of a lot simpler. - */ start = orig_start; length = orig_length; + /* Allocations have completed successfully, so set the bitmaps, and do + * the mappings. */ gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) { + gen8_ppgtt_pde_t *const pagedir = kmap_atomic(pd->page); struct i915_pagetab *pt; uint64_t pd_len = gen8_clamp_pd(start, length); uint64_t pd_start = start; uint32_t pde; - gen8_for_each_pde(pt, &ppgtt->pd, pd_start, pd_len, temp, pde) { - bitmap_set(pd->page_tables[pde]->used_ptes, - gen8_pte_index(start), - gen8_pte_count(start, length)); + + /* Every pd should be allocated, we just did that above. */ + BUG_ON(!pd); + + gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) { + /* Same reasoning as pd */ + BUG_ON(!pt); + BUG_ON(!pd_len); + BUG_ON(!gen8_pte_count(pd_start, pd_len)); + + /* Set our used ptes within the page table */ + bitmap_set(pt->used_ptes, + gen8_pte_index(pd_start), + gen8_pte_count(pd_start, pd_len)); + + /* Our pde is now pointing to the pagetable, pt */ set_bit(pde, pd->used_pdes); + + /* Map the PDE to the page table */ + __gen8_do_map_pt(pagedir + pde, pt, vm->dev); + + /* NB: We haven't yet mapped ptes to pages. At this + * point we're still relying on insert_entries() */ + + /* No longer possible this page table is a zombie */ + pt->zombie = 0; } + + if (!HAS_LLC(vm->dev)) + drm_clflush_virt_range(pagedir, PAGE_SIZE); + + kunmap_atomic(pagedir); + set_bit(pdpe, ppgtt->pdp.used_pdpes); + /* This pd is officially not a zombie either */ + ppgtt->pdp.pagedir[pdpe]->zombie = 0; } + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); return 0; err_out: - gen8_teardown_va_range(vm, orig_start, start); + while (pdpe--) { + for_each_set_bit(temp, new_page_tables[pdpe], GEN8_PDES_PER_PAGE) + free_pt_single(pd->page_tables[temp], vm->dev); + } + + for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES) + free_pd_single(ppgtt->pdp.pagedir[pdpe], vm->dev); + + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); return ret; } @@ -819,37 +1027,68 @@ err_out: * space. * */ -static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) +static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) { - struct i915_pagedir *pd; - uint64_t temp, start = 0; - const uint64_t orig_length = size; - uint32_t pdpe; - int ret; + ppgtt->scratch_pd = alloc_pt_scratch(ppgtt->base.dev); + if (IS_ERR(ppgtt->scratch_pd)) + return PTR_ERR(ppgtt->scratch_pd); ppgtt->base.start = 0; ppgtt->base.total = size; - ppgtt->base.clear_range = gen8_ppgtt_clear_range; - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; ppgtt->base.cleanup = gen8_ppgtt_cleanup; + ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; + ppgtt->switch_mm = gen8_mm_switch; - ppgtt->scratch_pd = alloc_pt_scratch(ppgtt->base.dev); - if (IS_ERR(ppgtt->scratch_pd)) - return PTR_ERR(ppgtt->scratch_pd); + return 0; +} +static int gen8_aliasing_ppgtt_init(struct i915_hw_ppgtt *ppgtt) +{ + struct drm_device *dev = ppgtt->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_pagedir *pd; + uint64_t temp, start = 0, size = dev_priv->gtt.base.total; + uint32_t pdpe; + int ret; + + ret = gen8_ppgtt_init_common(ppgtt, dev_priv->gtt.base.total); + if (ret) + return ret; + + /* Aliasing PPGTT has to always work and be mapped because of the way we + * use RESTORE_INHIBIT in the context switch. This will be fixed + * eventually. */ ret = gen8_alloc_va_range(&ppgtt->base, start, size); if (ret) { free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev); return ret; } - start = 0; - size = orig_length; - gen8_for_each_pdpe(pd, &ppgtt->pdp, start, size, temp, pdpe) gen8_map_pagetable_range(pd, start, size, ppgtt->base.dev); + ppgtt->base.allocate_va_range = NULL; + ppgtt->base.teardown_va_range = NULL; + ppgtt->base.clear_range = gen8_ppgtt_clear_range; + + return 0; +} + +static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) +{ + struct drm_device *dev = ppgtt->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = gen8_ppgtt_init_common(ppgtt, dev_priv->gtt.base.total); + if (ret) + return ret; + + ppgtt->base.allocate_va_range = gen8_alloc_va_range; + ppgtt->base.teardown_va_range = gen8_teardown_va_range; + ppgtt->base.clear_range = NULL; + return 0; } @@ -1399,9 +1638,9 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing) if (ret) return ret; - ppgtt->base.allocate_va_range = gen6_alloc_va_range; - ppgtt->base.teardown_va_range = gen6_teardown_va_range; - ppgtt->base.clear_range = gen6_ppgtt_clear_range; + ppgtt->base.allocate_va_range = aliasing ? NULL : gen6_alloc_va_range; + ppgtt->base.teardown_va_range = aliasing ? NULL : gen6_teardown_va_range; + ppgtt->base.clear_range = aliasing ? gen6_ppgtt_clear_range : NULL; ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.cleanup = gen6_ppgtt_cleanup; ppgtt->base.start = 0; @@ -1439,8 +1678,10 @@ static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt, if (INTEL_INFO(dev)->gen < 8) return gen6_ppgtt_init(ppgtt, aliasing); + else if ((IS_GEN8(dev) || IS_GEN9(dev)) && aliasing) + return gen8_aliasing_ppgtt_init(ppgtt); else if (IS_GEN8(dev) || IS_GEN9(dev)) - return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); + return gen8_ppgtt_init(ppgtt); else BUG(); } @@ -1454,8 +1695,9 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) kref_init(&ppgtt->ref); drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total); - ppgtt->base.clear_range(&ppgtt->base, 0, - ppgtt->base.total, true); + if (ppgtt->base.clear_range) + ppgtt->base.clear_range(&ppgtt->base, 0, + ppgtt->base.total, true); i915_init_vm(dev_priv, &ppgtt->base); } @@ -1577,10 +1819,7 @@ ppgtt_bind_vma(struct i915_vma *vma, static void ppgtt_unbind_vma(struct i915_vma *vma) { - vma->vm->clear_range(vma->vm, - vma->node.start, - vma->obj->base.size, - true); + WARN_ON(vma->vm->teardown_va_range && vma->vm->clear_range); if (vma->vm->teardown_va_range) { trace_i915_va_teardown(vma->vm, vma->node.start, vma->node.size, @@ -1589,7 +1828,14 @@ static void ppgtt_unbind_vma(struct i915_vma *vma) vma->vm->teardown_va_range(vma->vm, vma->node.start, vma->node.size); ppgtt_invalidate_tlbs(vma->vm); - } + } else if (vma->vm->clear_range) { + vma->vm->clear_range(vma->vm, + vma->node.start, + vma->obj->base.size, + true); + } else + BUG(); + } extern int intel_iommu_gfx_mapped; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 9b6caac..5446592 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -190,13 +190,26 @@ struct i915_vma { u32 flags); }; - +/* Zombies. We write page tables with the CPU, and hardware switches them with + * the GPU. As such, the only time we can safely remove a page table is when we + * know the context is idle. Since we have no good way to do this, we use the + * zombie. + * + * Under memory pressure, if the system is idle, zombies may be reaped. + * + * There are 3 states a page table can be in (not including scratch) + * bitmap = 0, zombie = 0: unallocated + * bitmap = 1, zombie = 0: allocated + * bitmap = 0, zombie = 1: zombie + * bitmap = 1, zombie = 1: invalid + */ struct i915_pagetab { struct page *page; dma_addr_t daddr; unsigned long *used_ptes; unsigned int scratch:1; + unsigned zombie:1; }; struct i915_pagedir { @@ -208,6 +221,7 @@ struct i915_pagedir { unsigned long *used_pdes; struct i915_pagetab *page_tables[GEN6_PPGTT_PD_ENTRIES]; + unsigned zombie:1; }; struct i915_pagedirpo { -- 2.1.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx