The legacy gen6 ppgtt needs a little more hand holding than gen8+, and so requires a larger structure. As I intend to make this slightly more complicated in the future, separate the gen6 from the core gen8 hw struct by subclassing. This patch moves the gen6 only features out to gen6_hw_ppgtt and pipes the new type everywhere that needs it. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> Cc: Matthew Auld <matthew.william.auld@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 104 +++++++++--------- drivers/gpu/drm/i915/i915_gem_gtt.h | 21 +++- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 - 5 files changed, 75 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index db43962397de..79547b81e28b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1636,20 +1636,20 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) return ERR_PTR(err); } -static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) +static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m) { - struct i915_address_space *vm = &ppgtt->vm; + struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base); + struct i915_address_space *vm = &base->vm; struct i915_page_table *unused; gen6_pte_t scratch_pte; u32 pd_entry, pte, pde; - u32 start = 0, length = ppgtt->vm.total; scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); - gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { + gen6_for_all_pdes(unused, &base->pd, pde) { u32 expected; gen6_pte_t *pt_vaddr; - const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); + const dma_addr_t pt_addr = px_dma(base->pd.page_table[pde]); pd_entry = readl(ppgtt->pd_addr + pde); expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); @@ -1660,7 +1660,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) expected); seq_printf(m, "\tPDE: %x\n", pd_entry); - pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]); + pt_vaddr = kmap_atomic_px(base->pd.page_table[pde]); for (pte = 0; pte < GEN6_PTES; pte+=4) { unsigned long va = @@ -1688,7 +1688,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } /* Write pde (index) from the page directory @pd to the page table @pt */ -static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt, +static inline void gen6_write_pde(const struct gen6_hw_ppgtt *ppgtt, const unsigned int pde, const struct i915_page_table *pt) { @@ -1699,26 +1699,27 @@ static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt, /* Write all the page tables found in the ppgtt structure to incrementing page * directories. */ -static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, +static void gen6_write_page_range(struct i915_hw_ppgtt *base, u32 start, u32 length) { + struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base); struct i915_page_table *pt; unsigned int pde; - gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) + gen6_for_each_pde(pt, &base->pd, start, length, pde) gen6_write_pde(ppgtt, pde, pt); - mark_tlbs_dirty(ppgtt); + mark_tlbs_dirty(base); wmb(); } -static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) +static inline u32 get_pd_offset(struct gen6_hw_ppgtt *ppgtt) { - GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); - return ppgtt->pd.base.ggtt_offset << 10; + GEM_BUG_ON(ppgtt->base.pd.base.ggtt_offset & 0x3f); + return ppgtt->base.pd.base.ggtt_offset << 10; } -static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, +static int hsw_mm_switch(struct gen6_hw_ppgtt *ppgtt, struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; @@ -1740,7 +1741,7 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, return 0; } -static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, +static int gen7_mm_switch(struct gen6_hw_ppgtt *ppgtt, struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; @@ -1762,7 +1763,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, return 0; } -static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, +static int gen6_mm_switch(struct gen6_hw_ppgtt *ppgtt, struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; @@ -1904,27 +1905,27 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, static int gen6_alloc_va_range(struct i915_address_space *vm, u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); struct i915_page_table *pt; u64 from = start; unsigned int pde; bool flush = false; - gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { + gen6_for_each_pde(pt, &ppgtt->base.pd, start, length, pde) { if (pt == vm->scratch_pt) { pt = alloc_pt(vm); if (IS_ERR(pt)) goto unwind_out; gen6_initialize_pt(vm, pt); - ppgtt->pd.page_table[pde] = pt; + ppgtt->base.pd.page_table[pde] = pt; gen6_write_pde(ppgtt, pde, pt); flush = true; } } if (flush) { - mark_tlbs_dirty(ppgtt); + mark_tlbs_dirty(&ppgtt->base); wmb(); } @@ -1962,24 +1963,23 @@ static void gen6_free_scratch(struct i915_address_space *vm) static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct i915_page_directory *pd = &ppgtt->pd; + struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); struct i915_page_table *pt; u32 pde; drm_mm_remove_node(&ppgtt->node); - gen6_for_all_pdes(pt, pd, pde) + gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) if (pt != vm->scratch_pt) free_pt(vm, pt); gen6_free_scratch(vm); } -static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) +static int gen6_ppgtt_allocate_page_directories(struct gen6_hw_ppgtt *ppgtt) { - struct i915_address_space *vm = &ppgtt->vm; - struct drm_i915_private *dev_priv = ppgtt->vm.i915; + struct i915_address_space *vm = &ppgtt->base.vm; + struct drm_i915_private *dev_priv = ppgtt->base.vm.i915; struct i915_ggtt *ggtt = &dev_priv->ggtt; int ret; @@ -2004,11 +2004,11 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) if (ppgtt->node.start < ggtt->mappable_end) DRM_DEBUG("Forced to use aperture for PDEs\n"); - ppgtt->pd.base.ggtt_offset = + ppgtt->base.pd.base.ggtt_offset = ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + - ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); + ppgtt->base.pd.base.ggtt_offset / sizeof(gen6_pte_t); return 0; @@ -2017,35 +2017,35 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) return ret; } -static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) +static int gen6_ppgtt_alloc(struct gen6_hw_ppgtt *ppgtt) { return gen6_ppgtt_allocate_page_directories(ppgtt); } -static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, +static void gen6_scratch_va_range(struct gen6_hw_ppgtt *ppgtt, u64 start, u64 length) { struct i915_page_table *unused; u32 pde; - gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) - ppgtt->pd.page_table[pde] = ppgtt->vm.scratch_pt; + gen6_for_each_pde(unused, &ppgtt->base.pd, start, length, pde) + ppgtt->base.pd.page_table[pde] = ppgtt->base.vm.scratch_pt; } static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) { struct i915_ggtt * const ggtt = &i915->ggtt; - struct i915_hw_ppgtt *ppgtt; + struct gen6_hw_ppgtt *ppgtt; int err; - ppgtt = kmalloc(sizeof(*ppgtt), GFP_KERNEL); + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); if (!ppgtt) return ERR_PTR(-ENOMEM); - ppgtt->vm.i915 = i915; - ppgtt->vm.dma = &i915->drm.pdev->dev; + ppgtt->base.vm.i915 = i915; + ppgtt->base.vm.dma = &i915->drm.pdev->dev; - ppgtt->vm.pte_encode = ggtt->vm.pte_encode; + ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; if (intel_vgpu_active(i915) || IS_GEN6(i915)) ppgtt->switch_mm = gen6_mm_switch; else if (IS_HASWELL(i915)) @@ -2059,36 +2059,36 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) if (err) goto err_free; - ppgtt->vm.total = I915_PDES * GEN6_PTES * PAGE_SIZE; + ppgtt->base.vm.total = I915_PDES * GEN6_PTES * PAGE_SIZE; - gen6_scratch_va_range(ppgtt, 0, ppgtt->vm.total); - gen6_write_page_range(ppgtt, 0, ppgtt->vm.total); + gen6_scratch_va_range(ppgtt, 0, ppgtt->base.vm.total); + gen6_write_page_range(&ppgtt->base, 0, ppgtt->base.vm.total); - err = gen6_alloc_va_range(&ppgtt->vm, 0, ppgtt->vm.total); + err = gen6_alloc_va_range(&ppgtt->base.vm, 0, ppgtt->base.vm.total); if (err) goto err_cleanup; - ppgtt->vm.clear_range = gen6_ppgtt_clear_range; - ppgtt->vm.insert_entries = gen6_ppgtt_insert_entries; - ppgtt->vm.cleanup = gen6_ppgtt_cleanup; - ppgtt->debug_dump = gen6_dump_ppgtt; + ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; + ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; + ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; + ppgtt->base.debug_dump = gen6_dump_ppgtt; - ppgtt->vm.vma_ops.bind_vma = gen6_ppgtt_bind_vma; - ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma; - ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages; - ppgtt->vm.vma_ops.clear_pages = clear_pages; + ppgtt->base.vm.vma_ops.bind_vma = gen6_ppgtt_bind_vma; + ppgtt->base.vm.vma_ops.unbind_vma = ppgtt_unbind_vma; + ppgtt->base.vm.vma_ops.set_pages = ppgtt_set_pages; + ppgtt->base.vm.vma_ops.clear_pages = clear_pages; DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n", - ppgtt->pd.base.ggtt_offset << 10); + ppgtt->base.pd.base.ggtt_offset << 10); - return ppgtt; + return &ppgtt->base; err_cleanup: - gen6_ppgtt_cleanup(&ppgtt->vm); + gen6_ppgtt_cleanup(&ppgtt->base.vm); err_free: kfree(ppgtt); return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index fb296962de95..c0eb60484eb4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -395,7 +395,7 @@ struct i915_ggtt { struct i915_hw_ppgtt { struct i915_address_space vm; struct kref ref; - struct drm_mm_node node; + unsigned long pd_dirty_rings; union { struct i915_pml4 pml4; /* GEN8+ & 48b PPGTT */ @@ -403,13 +403,26 @@ struct i915_hw_ppgtt { struct i915_page_directory pd; /* GEN6-7 */ }; + void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); +}; + +struct gen6_hw_ppgtt { + struct i915_hw_ppgtt base; + + struct drm_mm_node node; gen6_pte_t __iomem *pd_addr; - int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, - struct i915_request *rq); - void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); + int (*switch_mm)(struct gen6_hw_ppgtt *ppgtt, struct i915_request *rq); }; +#define __to_gen6_ppgtt(base) container_of(base, struct gen6_hw_ppgtt, base) + +static inline struct gen6_hw_ppgtt *to_gen6_ppgtt(struct i915_hw_ppgtt *base) +{ + BUILD_BUG_ON(offsetof(struct gen6_hw_ppgtt, base)); + return __to_gen6_ppgtt(base); +} + /* * gen6_for_each_pde() iterates over every pde from start until start+length. * If start and start+length are not perfectly divisible, the macro will round diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index fa517a3e3c25..082e18a251b5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1547,10 +1547,10 @@ static int switch_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *to_ctx = rq->gem_context; - struct i915_hw_ppgtt *to_mm = - to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; + struct gen6_hw_ppgtt *to_mm = + to_gen6_ppgtt(to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt); struct i915_gem_context *from_ctx = engine->legacy_active_context; - struct i915_hw_ppgtt *from_mm = engine->legacy_active_ppgtt; + struct gen6_hw_ppgtt *from_mm = engine->legacy_active_ppgtt; u32 hw_flags = 0; int ret, i; @@ -1558,13 +1558,13 @@ static int switch_context(struct i915_request *rq) GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); if (to_mm != from_mm || - (to_mm && intel_engine_flag(engine) & to_mm->pd_dirty_rings)) { + (to_mm && intel_engine_flag(engine) & to_mm->base.pd_dirty_rings)) { trace_switch_mm(engine, to_ctx); ret = to_mm->switch_mm(to_mm, rq); if (ret) goto err; - to_mm->pd_dirty_rings &= ~intel_engine_flag(engine); + to_mm->base.pd_dirty_rings &= ~intel_engine_flag(engine); engine->legacy_active_ppgtt = to_mm; hw_flags = MI_FORCE_RESTORE; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index acef385c4c80..83cf55e44c08 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -564,7 +564,7 @@ struct intel_engine_cs { * stream (ring). */ struct i915_gem_context *legacy_active_context; - struct i915_hw_ppgtt *legacy_active_ppgtt; + struct gen6_hw_ppgtt *legacy_active_ppgtt; /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f80cf7ce3fa9..538e658252f7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -142,10 +142,6 @@ static int igt_ppgtt_alloc(void *arg) if (!USES_PPGTT(dev_priv)) return 0; - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) - return -ENOMEM; - mutex_lock(&dev_priv->drm.struct_mutex); ppgtt = __hw_ppgtt_create(dev_priv); if (IS_ERR(ppgtt)) { -- 2.17.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx