This transitional patch doesn't do much for the existing code. However, it should make upcoming patches to use the full 48b address space a bit easier to swallow. The patch also introduces the PML4, ie. the new top level structure of the page tables. Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 5 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 122 +++++++++++++++++++++++++++++------- drivers/gpu/drm/i915/i915_gem_gtt.h | 40 +++++++++--- drivers/gpu/drm/i915/i915_trace.h | 16 +++++ 4 files changed, 151 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 29bf034..4d53728 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1920,6 +1920,11 @@ struct drm_i915_cmd_table { #define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) #define USES_PPGTT(dev) intel_enable_ppgtt(dev, false) #define USES_FULL_PPGTT(dev) intel_enable_ppgtt(dev, true) +#ifdef CONFIG_32BIT +# define HAS_48B_PPGTT(dev) false +#else +# define HAS_48B_PPGTT(dev) (IS_BROADWELL(dev) && false) +#endif #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4d01d4e..df3cd41 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -402,6 +402,45 @@ free_pd: return ERR_PTR(ret); } +static void __pdp_fini(struct i915_pagedirpo *pdp) +{ + kfree(pdp->used_pdpes); + kfree(pdp->pagedirs); + /* HACK */ + pdp->pagedirs = NULL; +} + +static void free_pdp_single(struct i915_pagedirpo *pdp, + struct drm_device *dev) +{ + __pdp_fini(pdp); + if (HAS_48B_PPGTT(dev)) + kfree(pdp); +} + +static int __pdp_init(struct i915_pagedirpo *pdp, + struct drm_device *dev) +{ + size_t pdpes = I915_PDPES_PER_PDP(dev); + + pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), + sizeof(unsigned long), + GFP_KERNEL); + if (!pdp->used_pdpes) + return -ENOMEM; + + pdp->pagedirs = kcalloc(pdpes, sizeof(*pdp->pagedirs), GFP_KERNEL); + if (!pdp->pagedirs) { + kfree(pdp->used_pdpes); + /* the PDP might be the statically allocated top level. Keep it + * as clean as possible */ + pdp->used_pdpes = NULL; + return -ENOMEM; + } + + return 0; +} + /* Broadwell Page Directory Pointer Descriptors */ static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry, @@ -440,7 +479,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, { int i, ret; - for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { + for (i = 3; i >= 0; i--) { struct i915_pagedir *pd = ppgtt->pdp.pagedirs[i]; dma_addr_t addr = pd ? pd->daddr : ppgtt->scratch_pt->daddr; /* The page directory might be NULL, but we need to clear out @@ -514,9 +553,6 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, pt_vaddr = NULL; for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { - if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES)) - break; - if (pt_vaddr == NULL) { struct i915_pagedir *pd = ppgtt->pdp.pagedirs[pdpe]; struct i915_pagetab *pt = pd->page_tables[pde]; @@ -605,10 +641,16 @@ static void gen8_teardown_va_range(struct i915_address_space *vm, { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); + struct drm_device *dev = vm->dev; struct i915_pagedir *pd; struct i915_pagetab *pt; uint64_t temp; - uint32_t pdpe, pde; + uint32_t pdpe, pde, orig_start = start; + + if (!ppgtt->pdp.pagedirs) { + /* If pagedirs are already free, there is nothing to do.*/ + return; + } gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) { uint64_t pd_len = gen8_clamp_pd(start, length); @@ -653,7 +695,7 @@ static void gen8_teardown_va_range(struct i915_address_space *vm, pde, pd_start & GENMASK_ULL(64, GEN8_PDE_SHIFT), GEN8_PDE_SHIFT); - free_pt_single(pt, vm->dev); + free_pt_single(pt, dev); /* This may be nixed later. Optimize? */ gen8_unmap_pagetable(ppgtt, pd, pde); } else { @@ -662,7 +704,7 @@ static void gen8_teardown_va_range(struct i915_address_space *vm, } if (bitmap_empty(pd->used_pdes, I915_PDES_PER_PD)) { - free_pd_single(pd, vm->dev); + free_pd_single(pd, dev); ppgtt->pdp.pagedirs[pdpe] = NULL; trace_i915_pagedirectory_destroy(vm, pdpe, start & GENMASK_ULL(64, GEN8_PDPE_SHIFT), @@ -670,6 +712,14 @@ static void gen8_teardown_va_range(struct i915_address_space *vm, WARN_ON(!test_and_clear_bit(pdpe, ppgtt->pdp.used_pdpes)); } } + + if (bitmap_empty(ppgtt->pdp.used_pdpes, I915_PDPES_PER_PDP(dev))) { + /* TODO: When pagetables are fully dynamic: + free_pdp_single(&ppgtt->pdp, dev); */ + trace_i915_pagedirpo_destroy(vm, 0, + orig_start & GENMASK_ULL(64, GEN8_PML4E_SHIFT), + GEN8_PML4E_SHIFT); + } } static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) @@ -678,6 +728,10 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) ppgtt->base.start, ppgtt->base.total); gen8_teardown_va_range(&ppgtt->base, ppgtt->base.start, ppgtt->base.total); + + WARN_ON(!bitmap_empty(ppgtt->pdp.used_pdpes, + I915_PDPES_PER_PDP(ppgtt->base.dev))); + free_pdp_single(&ppgtt->pdp, ppgtt->base.dev); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -771,11 +825,13 @@ static int gen8_ppgtt_alloc_pagedirs(struct i915_hw_ppgtt *ppgtt, uint64_t length, unsigned long *new_pds) { + struct drm_device *dev = ppgtt->base.dev; struct i915_pagedir *unused; uint64_t temp; uint32_t pdpe; + size_t pdpes = I915_PDPES_PER_PDP(ppgtt->base.dev); - BUG_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES)); + BUG_ON(!bitmap_empty(new_pds, pdpes)); /* FIXME: PPGTT container_of won't work for 64b */ BUG_ON((start + length) > 0x800000000ULL); @@ -798,17 +854,18 @@ static int gen8_ppgtt_alloc_pagedirs(struct i915_hw_ppgtt *ppgtt, return 0; unwind_out: - for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES) + for_each_set_bit(pdpe, new_pds, pdpes) free_pd_single(pdp->pagedirs[pdpe], ppgtt->base.dev); return -ENOMEM; } void free_gen8_temp_bitmaps(unsigned long *new_pds, - unsigned long **new_pts) + unsigned long **new_pts, + size_t pdpes) { int i; - for (i = 0; i < GEN8_LEGACY_PDPES; i++) + for (i = 0; i < pdpes; i++) kfree(new_pts[i]); kfree(new_pts); kfree(new_pds); @@ -818,10 +875,11 @@ void free_gen8_temp_bitmaps(unsigned long *new_pds, * of these are based on the number of PDPEs in the system. */ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, - unsigned long ***new_pts) + unsigned long ***new_pts, + size_t pdpes) { int i; - unsigned long *pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), + unsigned long *pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_KERNEL); @@ -832,7 +890,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, if (!pts || !pds) goto err_out; - for (i = 0; i < GEN8_LEGACY_PDPES; i++) { + for (i = 0; i < pdpes; i++) { pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES_PER_PD), sizeof(unsigned long), GFP_KERNEL); if (!pts[i]) @@ -845,7 +903,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, return 0; err_out: - for (i = 0; i < GEN8_LEGACY_PDPES; i++) + for (i = 0; i < pdpes; i++) kfree(pts[i]); kfree(pds); kfree(pts); @@ -859,11 +917,13 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); unsigned long *new_page_dirs, **new_page_tables; + struct drm_device *dev = vm->dev; struct i915_pagedir *pd; const uint64_t orig_start = start; const uint64_t orig_length = length; uint64_t temp; uint32_t pdpe; + size_t pdpes = I915_PDPES_PER_PDP(dev); int ret; #ifdef CONFIG_32BIT @@ -880,7 +940,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, if (WARN_ON(start + length < start)) return -ERANGE; - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables); + ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); if (ret) return ret; @@ -888,7 +948,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, ret = gen8_ppgtt_alloc_pagedirs(ppgtt, &ppgtt->pdp, start, length, new_page_dirs); if (ret) { - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); return ret; } @@ -926,7 +986,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, gen8_map_pagetable_range(vm, pd, start, length); } - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); return 0; err_out: @@ -935,13 +995,19 @@ err_out: free_pt_single(pd->page_tables[temp], ppgtt->base.dev); } - for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES) + for_each_set_bit(pdpe, new_page_dirs, pdpes) free_pd_single(ppgtt->pdp.pagedirs[pdpe], ppgtt->base.dev); - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); return ret; } +static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt) +{ + free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev); + free_pdp_single(&ppgtt->pdp, ppgtt->base.dev); +} + /** * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -955,13 +1021,24 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->base.total = size; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->enable = gen8_ppgtt_enable; - ppgtt->switch_mm = gen8_mm_switch; ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; ppgtt->scratch_pd = alloc_pt_scratch(ppgtt->base.dev); if (IS_ERR(ppgtt->scratch_pd)) return PTR_ERR(ppgtt->scratch_pd); + if (!HAS_48B_PPGTT(ppgtt->base.dev)) { + int ret = __pdp_init(&ppgtt->pdp, false); + if (ret) { + free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev); + return ret; + } + + ppgtt->switch_mm = gen8_mm_switch; + trace_i915_pagedirpo_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT); + } else + BUG(); /* Not yet implemented */ + return 0; } @@ -980,7 +1057,7 @@ static int gen8_aliasing_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ret = gen8_alloc_va_range(&ppgtt->base, start, size); if (ret) { - free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev); + gen8_ppgtt_fini_common(ppgtt); return ret; } @@ -2023,6 +2100,7 @@ void gen8_for_every_pdpe_pde(struct i915_hw_ppgtt *ppgtt, void *data), void *data) { + struct drm_device *dev = ppgtt->base.dev; uint64_t start = ppgtt->base.start; uint64_t length = ppgtt->base.total; uint64_t pdpe, pde, temp; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index b3d0776..94c825e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -88,7 +88,6 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ #define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ -#define GEN8_LEGACY_PDPES 4 #define GEN8_PTES_PER_PT (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) /* GEN8 legacy style address is defined as a 3 level page table: @@ -97,8 +96,17 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; * The difference as compared to normal x86 3 level page table is the PDPEs are * programmed via register. */ +#ifndef CONFIG_32BIT +# define I915_PDPES_PER_PDP(dev) (HAS_48B_PPGTT(dev) ? 512 : 4) +#else +# define I915_PDPES_PER_PDP 4 +#endif +#define GEN8_PML4ES_PER_PML4 512 +#define GEN8_PML4E_SHIFT 39 #define GEN8_PDPE_SHIFT 30 -#define GEN8_PDPE_MASK 0x3 +/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page + * tables */ +#define GEN8_PDPE_MASK 0x1ff #define GEN8_PDE_SHIFT 21 #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) @@ -195,9 +203,17 @@ struct i915_pagedir { }; struct i915_pagedirpo { - /* struct page *page; */ - DECLARE_BITMAP(used_pdpes, GEN8_LEGACY_PDPES); - struct i915_pagedir *pagedirs[GEN8_LEGACY_PDPES]; + struct page *page; + dma_addr_t daddr; + unsigned long *used_pdpes; + struct i915_pagedir **pagedirs; +}; + +struct i915_pml4 { + struct page *page; + dma_addr_t daddr; + DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4); + struct i915_pagedirpo *pdps[GEN8_PML4ES_PER_PML4]; }; struct i915_address_space { @@ -263,8 +279,9 @@ struct i915_hw_ppgtt { struct kref ref; struct drm_mm_node node; union { - struct i915_pagedirpo pdp; - struct i915_pagedir pd; + struct i915_pml4 pml4; /* GEN8+ & 64b PPGTT */ + struct i915_pagedirpo pdp; /* GEN8+ */ + struct i915_pagedir pd; /* GEN6-7 */ }; union { @@ -411,14 +428,17 @@ static inline size_t gen6_pde_count(uint32_t addr, uint32_t length) temp = min(temp, length), \ start += temp, length -= temp) -#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ - for (iter = gen8_pdpe_index(start), pd = (pdp)->pagedirs[iter]; \ - length > 0 && iter < GEN8_LEGACY_PDPES; \ +#define gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, b) \ + for (iter = gen8_pdpe_index(start), pd = (pdp)->pagedirs[iter]; \ + length > 0 && (iter < b); \ pd = (pdp)->pagedirs[++iter], \ temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start, \ temp = min(temp, length), \ start += temp, length -= temp) +#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ + gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev)) + /* Clamp length to the next pagetab boundary */ static inline uint64_t gen8_clamp_pt(uint64_t start, uint64_t length) { diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 49f2389..17b8059 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -233,6 +233,22 @@ DEFINE_EVENT_PRINT(i915_pagetable, i915_pagedirectory_destroy, __entry->vm, __entry->pde, __entry->start, __entry->end) ); +DEFINE_EVENT_PRINT(i915_pagetable, i915_pagedirpo_alloc, + TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift), + TP_ARGS(vm, pml4e, start, pml4e_shift), + + TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)", + __entry->vm, __entry->pde, __entry->start, __entry->end) +); + +DEFINE_EVENT_PRINT(i915_pagetable, i915_pagedirpo_destroy, + TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift), + TP_ARGS(vm, pml4e, start, pml4e_shift), + + TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)", + __entry->vm, __entry->pde, __entry->start, __entry->end) +); + /* Avoid extra math because we only support two sizes. The format is defined by * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */ #define TRACE_PT_SIZE(bits) \ -- 1.9.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx