After much hair pulling, resort to preallocating the ppGTT entries on init to circumvent the apparent lack of PD invalidate following the write to PP_DCLV upon switching mm between contexts (and here the same context after binding new objects). However, the details of that PP_DCLV invalidate are still unknown, and it appears we need to reload the mm twice to cover over a timing issue. Worrying. Fixes: 3dc007fe9b2b ("drm/i915/gtt: Downgrade gen7 (ivb, byt, hsw) back to aliasing-ppgtt") Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- .../gpu/drm/i915/gt/intel_ring_submission.c | 21 ++++++++----------- drivers/gpu/drm/i915/i915_gem_gtt.c | 21 ++++++++++++------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/i915_pci.c | 2 +- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index f25ceccb335e..f977fc27b001 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1366,7 +1366,7 @@ static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt) const struct intel_engine_cs * const engine = rq->engine; u32 *cs; - cs = intel_ring_begin(rq, 6); + cs = intel_ring_begin(rq, 10); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1374,6 +1374,12 @@ static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt) *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); *cs++ = PP_DIR_DCLV_2G; + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); + *cs++ = MI_NOOP; + *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10; @@ -1579,6 +1585,7 @@ static int switch_context(struct i915_request *rq) { struct intel_context *ce = rq->hw_context; struct i915_address_space *vm = vm_alias(ce); + u32 hw_flags = 0; int ret; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); @@ -1590,19 +1597,9 @@ static int switch_context(struct i915_request *rq) } if (ce->state) { - u32 hw_flags; - GEM_BUG_ON(rq->engine->id != RCS0); - /* - * The kernel context(s) is treated as pure scratch and is not - * expected to retain any state (as we sacrifice it during - * suspend and on resume it may be corrupted). This is ok, - * as nothing actually executes using the kernel context; it - * is purely used for flushing user contexts. - */ - hw_flags = 0; - if (i915_gem_context_is_kernel(rq->gem_context)) + if (!rq->engine->default_state) hw_flags = MI_RESTORE_INHIBIT; ret = mi_set_context(rq, hw_flags); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6239a9adbf14..98835fea38a9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1692,7 +1692,6 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, intel_wakeref_t wakeref; u64 from = start; unsigned int pde; - bool flush = false; int ret = 0; wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); @@ -1717,11 +1716,6 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, spin_lock(&pd->lock); if (pd->entry[pde] == &vm->scratch[1]) { pd->entry[pde] = pt; - if (i915_vma_is_bound(ppgtt->vma, - I915_VMA_GLOBAL_BIND)) { - gen6_write_pde(ppgtt, pde, pt); - flush = true; - } } else { alloc = pt; pt = pd->entry[pde]; @@ -1732,9 +1726,19 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, } spin_unlock(&pd->lock); - if (flush) + if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) { + mutex_lock(&ppgtt->flush); + + /* Rewrite them all! Anything less misses an invalidate. */ + gen6_for_all_pdes(pt, pd, pde) + gen6_write_pde(ppgtt, pde, pt); + + ioread32(ppgtt->pd_addr + pde - 1); gen6_ggtt_invalidate(vm->gt->ggtt); + mutex_unlock(&ppgtt->flush); + } + goto out; unwind_out: @@ -1793,6 +1797,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) gen6_ppgtt_free_pd(ppgtt); free_scratch(vm); + mutex_destroy(&ppgtt->flush); mutex_destroy(&ppgtt->pin_mutex); kfree(ppgtt->base.pd); } @@ -1958,6 +1963,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) if (!ppgtt) return ERR_PTR(-ENOMEM); + mutex_init(&ppgtt->flush); mutex_init(&ppgtt->pin_mutex); ppgtt_init(&ppgtt->base, &i915->gt); @@ -1994,6 +2000,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) err_pd: kfree(ppgtt->base.pd); err_free: + mutex_destroy(&ppgtt->pin_mutex); kfree(ppgtt); return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 402283ce2864..31a4a96ddd0d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -443,6 +443,7 @@ struct i915_ppgtt { struct gen6_ppgtt { struct i915_ppgtt base; + struct mutex flush; struct i915_vma *vma; gen6_pte_t __iomem *pd_addr; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index da3e9b5752ac..583e0cd94a6a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -436,7 +436,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { .has_rc6 = 1, \ .has_rc6p = 1, \ .has_rps = true, \ - .ppgtt_type = INTEL_PPGTT_ALIASING, \ + .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 31, \ IVB_PIPE_OFFSETS, \ IVB_CURSOR_OFFSETS, \ -- 2.24.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx