On 31-10-2022 23:16, Matt Roper wrote: > On Mon, Oct 31, 2022 at 06:01:11PM +0530, Aravind Iddamsetty wrote: >> On XE_LPM+ platforms the media engines are carved out into a separate >> GT but have a common GGTMMADR address range which essentially makes >> the GGTT address space to be shared between media and render GT. > > While this is all true, I feel like this description is lacking a bit of > explanation for why/how that translates into the code changes below. > For example you should elaborate on the areas this impacts, such as the > need to invalidate both GTs' TLBs, retire requests for both GTs, etc. > > Also, the movement of the PAT setup should be noted and explained as > well since it differs from how you approached the other changes here. > >> >> BSPEC: 63834 >> >> Cc: Matt Roper <matthew.d.roper@xxxxxxxxx> >> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@xxxxxxxxx> >> --- >> drivers/gpu/drm/i915/gt/intel_ggtt.c | 49 +++++++++++------- >> drivers/gpu/drm/i915/gt/intel_gt.c | 15 +++++- >> drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 ++ >> drivers/gpu/drm/i915/gt/intel_gtt.h | 3 ++ >> drivers/gpu/drm/i915/i915_driver.c | 19 +++++-- >> drivers/gpu/drm/i915/i915_gem_evict.c | 63 +++++++++++++++++------ >> drivers/gpu/drm/i915/i915_vma.c | 5 +- >> drivers/gpu/drm/i915/selftests/i915_gem.c | 2 + >> drivers/gpu/drm/i915/selftests/mock_gtt.c | 1 + >> 9 files changed, 115 insertions(+), 45 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c >> index 2518cebbf931..f5c2f3c58627 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c >> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c >> @@ -196,10 +196,13 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm) >> >> void i915_ggtt_suspend(struct i915_ggtt *ggtt) >> { >> + struct intel_gt *gt; >> + >> i915_ggtt_suspend_vm(&ggtt->vm); >> ggtt->invalidate(ggtt); >> >> - intel_gt_check_and_clear_faults(ggtt->vm.gt); >> + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >> + intel_gt_check_and_clear_faults(gt); >> } >> >> void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) >> @@ -214,27 +217,36 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) >> >> static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) >> { >> - struct intel_uncore *uncore = ggtt->vm.gt->uncore; >> + struct intel_uncore *uncore; >> + struct intel_gt *gt; >> >> - /* >> - * Note that as an uncached mmio write, this will flush the >> - * WCB of the writes into the GGTT before it triggers the invalidate. >> - */ >> - intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); >> + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { >> + uncore = gt->uncore; >> + /* >> + * Note that as an uncached mmio write, this will flush the >> + * WCB of the writes into the GGTT before it triggers the invalidate. >> + */ >> + intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); > > This isn't a GT register, so writing it for each GT doesn't do anything > different than just writing it once. But actually it doesn't look like > this is even a register we should be writing to anymore since Xe_HP. > The GFX_FLSH_CNTL register no longer lives here. Ok I'll remove the iteration over gt, also i do not see an equivalent register for Xe_HP, so i'll leave this as it is for now. > >> + } >> } >> >> static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) >> { >> - struct intel_uncore *uncore = ggtt->vm.gt->uncore; >> struct drm_i915_private *i915 = ggtt->vm.i915; >> >> gen8_ggtt_invalidate(ggtt); >> >> - if (GRAPHICS_VER(i915) >= 12) >> - intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, >> - GEN12_GUC_TLB_INV_CR_INVALIDATE); >> - else >> - intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); >> + if (GRAPHICS_VER(i915) >= 12) { >> + struct intel_gt *gt; >> + >> + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >> + intel_uncore_write_fw(gt->uncore, >> + GEN12_GUC_TLB_INV_CR, >> + GEN12_GUC_TLB_INV_CR_INVALIDATE); >> + } else { >> + intel_uncore_write_fw(ggtt->vm.gt->uncore, >> + GEN8_GTCR, GEN8_GTCR_INVALIDATE); >> + } >> } >> >> u64 gen8_ggtt_pte_encode(dma_addr_t addr, >> @@ -986,8 +998,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) >> >> ggtt->vm.pte_encode = gen8_ggtt_pte_encode; >> >> - setup_private_pat(ggtt->vm.gt); >> - >> return ggtt_probe_common(ggtt, size); >> } >> >> @@ -1186,7 +1196,7 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) >> (u64)ggtt->mappable_end >> 20); >> drm_dbg(&i915->drm, "DSM size = %lluM\n", >> (u64)resource_size(&intel_graphics_stolen_res) >> 20); >> - >> + INIT_LIST_HEAD(&ggtt->gt_list); >> return 0; >> } >> >> @@ -1296,9 +1306,11 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm) >> >> void i915_ggtt_resume(struct i915_ggtt *ggtt) >> { >> + struct intel_gt *gt; >> bool flush; >> >> - intel_gt_check_and_clear_faults(ggtt->vm.gt); >> + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >> + intel_gt_check_and_clear_faults(gt); >> >> flush = i915_ggtt_resume_vm(&ggtt->vm); >> >> @@ -1307,9 +1319,6 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) >> if (flush) >> wbinvd_on_all_cpus(); >> >> - if (GRAPHICS_VER(ggtt->vm.i915) >= 8) >> - setup_private_pat(ggtt->vm.gt); >> - >> intel_ggtt_restore_fences(ggtt); >> } >> >> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c >> index 2e796ffad911..d72efb74563a 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_gt.c >> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c >> @@ -110,9 +110,17 @@ static int intel_gt_probe_lmem(struct intel_gt *gt) >> >> int intel_gt_assign_ggtt(struct intel_gt *gt) >> { >> - gt->ggtt = drmm_kzalloc(>->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL); >> + /* Media GT shares primary GT's GGTT */ >> + if (gt->type == GT_MEDIA) { >> + gt->ggtt = to_gt(gt->i915)->ggtt; >> + } else { >> + gt->ggtt = drmm_kzalloc(>->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL); >> + if (!gt->ggtt) >> + return -ENOMEM; >> + } >> >> - return gt->ggtt ? 0 : -ENOMEM; >> + list_add_tail(>->ggtt_link, >->ggtt->gt_list); >> + return 0; >> } >> >> int intel_gt_init_mmio(struct intel_gt *gt) >> @@ -965,6 +973,9 @@ int intel_gt_tiles_init(struct drm_i915_private *i915) >> int ret; >> >> for_each_gt(gt, i915, id) { >> + if (GRAPHICS_VER(i915) >= 8) >> + setup_private_pat(gt); >> + > > Since the term "tile" is used for PVC-style remote tiles (which we have > some framework for, but haven't enabled yet), it seems confusing to have > the PAT setup for all GTs (including the standalone media GT) in a > function called intel_gt_tiles_init(). Maybe we should also have a prep > patch that renames this function if we're going to start doing non-tile > things in here too? i'll move this to i915_driver_hw_probe > >> ret = intel_gt_probe_lmem(gt); >> if (ret) >> return ret; >> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h >> index 6f686a4244f0..aee37e9e79b0 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h >> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h >> @@ -292,6 +292,9 @@ struct intel_gt { >> struct kobject *sysfs_defaults; >> >> struct i915_perf_gt perf; >> + >> + /** link: &ggtt.gt_list */ >> + struct list_head ggtt_link; >> }; >> >> struct intel_gt_definition { >> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h >> index 4d75ba4bb41d..cb1272702a1a 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h >> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h >> @@ -390,6 +390,9 @@ struct i915_ggtt { >> struct mutex error_mutex; >> struct drm_mm_node error_capture; >> struct drm_mm_node uc_fw; >> + >> + /** List of GTs mapping this GGTT */ >> + struct list_head gt_list; >> }; >> >> struct i915_ppgtt { >> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c >> index c3d43f9b1e45..6b973182ddcc 100644 >> --- a/drivers/gpu/drm/i915/i915_driver.c >> +++ b/drivers/gpu/drm/i915/i915_driver.c >> @@ -577,7 +577,8 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) >> { >> struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); >> struct pci_dev *root_pdev; >> - int ret; >> + struct intel_gt *gt; >> + int ret, i; >> >> if (i915_inject_probe_failure(dev_priv)) >> return -ENODEV; >> @@ -614,9 +615,11 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) >> >> i915_perf_init(dev_priv); >> >> - ret = intel_gt_assign_ggtt(to_gt(dev_priv)); >> - if (ret) >> - goto err_perf; >> + for_each_gt(gt, dev_priv, i) { >> + ret = intel_gt_assign_ggtt(gt); >> + if (ret) >> + goto err_perf; >> + } >> >> ret = i915_ggtt_probe_hw(dev_priv); >> if (ret) >> @@ -1318,7 +1321,8 @@ int i915_driver_suspend_switcheroo(struct drm_i915_private *i915, >> static int i915_drm_resume(struct drm_device *dev) >> { >> struct drm_i915_private *dev_priv = to_i915(dev); >> - int ret; >> + struct intel_gt *gt; >> + int ret, i; >> >> disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); >> >> @@ -1333,6 +1337,11 @@ static int i915_drm_resume(struct drm_device *dev) >> drm_err(&dev_priv->drm, "failed to re-enable GGTT\n"); >> >> i915_ggtt_resume(to_gt(dev_priv)->ggtt); >> + >> + for_each_gt(gt, dev_priv, i) >> + if (GRAPHICS_VER(gt->i915) >= 8) >> + setup_private_pat(gt); >> + >> /* Must be called after GGTT is resumed. */ >> intel_dpt_resume(dev_priv); >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c >> index f025ee4fa526..e9d4352ebfb8 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_evict.c >> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c >> @@ -43,16 +43,30 @@ static bool dying_vma(struct i915_vma *vma) >> return !kref_read(&vma->obj->base.refcount); >> } >> >> -static int ggtt_flush(struct intel_gt *gt) >> +static int ggtt_flush(struct i915_address_space *vm) >> { >> - /* >> - * Not everything in the GGTT is tracked via vma (otherwise we >> - * could evict as required with minimal stalling) so we are forced >> - * to idle the GPU and explicitly retire outstanding requests in >> - * the hopes that we can then remove contexts and the like only >> - * bound by their active reference. >> - */ >> - return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); >> + int ret = 0; >> + >> + if (i915_is_ggtt(vm)) { >> + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); >> + struct intel_gt *gt; >> + >> + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { >> + /* >> + * Not everything in the GGTT is tracked via vma (otherwise we >> + * could evict as required with minimal stalling) so we are forced >> + * to idle the GPU and explicitly retire outstanding requests in >> + * the hopes that we can then remove contexts and the like only >> + * bound by their active reference. >> + */ >> + ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); >> + if (ret) >> + return ret; >> + } >> + } else { >> + ret = intel_gt_wait_for_idle(vm->gt, MAX_SCHEDULE_TIMEOUT); > > This function is only ever got called on the GGTT as far as I can see > (which makes sense given that its name starts with 'ggtt'). It's not > clear to me why we want to handle PPGTT too now? > Thanks for catching this, it is indeed only used for GGTT, so this i915_is_ggtt check is not needed. > Even if this is intentional and correct, it might be best to move this > up to a small if statement at the top of the function with a return so > that we can eliminate a level of nesting from most of the function. > > if (!i915_is_ggtt(vm)) { > wait for idle; > return; > } > > ... > > >> + } >> + return ret; >> } >> >> static bool grab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx *ww) >> @@ -149,6 +163,7 @@ i915_gem_evict_something(struct i915_address_space *vm, >> struct drm_mm_node *node; >> enum drm_mm_insert_mode mode; >> struct i915_vma *active; >> + struct intel_gt *gt; >> int ret; >> >> lockdep_assert_held(&vm->mutex); >> @@ -174,7 +189,14 @@ i915_gem_evict_something(struct i915_address_space *vm, >> min_size, alignment, color, >> start, end, mode); >> >> - intel_gt_retire_requests(vm->gt); >> + if (i915_is_ggtt(vm)) { >> + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); >> + >> + list_for_each_entry_rcu(gt, &ggtt->gt_list, ggtt_link) >> + intel_gt_retire_requests(gt); >> + } else { >> + intel_gt_retire_requests(vm->gt); >> + } >> >> search_again: >> active = NULL; >> @@ -246,7 +268,7 @@ i915_gem_evict_something(struct i915_address_space *vm, >> if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) >> return -EBUSY; >> >> - ret = ggtt_flush(vm->gt); >> + ret = ggtt_flush(vm); > > As noted above, this call is always done on GGTT (since we checked and > returned if it wasn't GGTT just before this point in the function). > >> if (ret) >> return ret; >> >> @@ -332,7 +354,15 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, >> * a stray pin (preventing eviction) that can only be resolved by >> * retiring. >> */ >> - intel_gt_retire_requests(vm->gt); >> + if (i915_is_ggtt(vm)) { >> + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); >> + struct intel_gt *gt; >> + >> + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) >> + intel_gt_retire_requests(gt); >> + } else { >> + intel_gt_retire_requests(vm->gt); >> + } >> >> if (i915_vm_has_cache_coloring(vm)) { >> /* Expand search to cover neighbouring guard pages (or lack!) */ >> @@ -437,11 +467,10 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) >> * pin themselves inside the global GTT and performing the >> * switch otherwise is ineffective. >> */ >> - if (i915_is_ggtt(vm)) { >> - ret = ggtt_flush(vm->gt); >> - if (ret) >> - return ret; >> - } >> + >> + ret = ggtt_flush(vm); >> + if (ret) >> + return ret; > > It's not clear to me why we're removing the GGTT check here? with the above change done, this will be restored. Thanks, Aravind. > > > Matt > >> >> do { >> struct i915_vma *vma, *vn; >> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c >> index c39488eb9eeb..24cbee3c1ce5 100644 >> --- a/drivers/gpu/drm/i915/i915_vma.c >> +++ b/drivers/gpu/drm/i915/i915_vma.c >> @@ -1542,6 +1542,8 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, >> u32 align, unsigned int flags) >> { >> struct i915_address_space *vm = vma->vm; >> + struct intel_gt *gt; >> + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); >> int err; >> >> do { >> @@ -1557,7 +1559,8 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, >> } >> >> /* Unlike i915_vma_pin, we don't take no for an answer! */ >> - flush_idle_contexts(vm->gt); >> + list_for_each_entry_rcu(gt, &ggtt->gt_list, ggtt_link) >> + flush_idle_contexts(gt); >> if (mutex_lock_interruptible(&vm->mutex) == 0) { >> /* >> * We pass NULL ww here, as we don't want to unbind >> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c >> index e5dd82e7e480..2535b9684bd1 100644 >> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c >> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c >> @@ -127,6 +127,8 @@ static void igt_pm_resume(struct drm_i915_private *i915) >> */ >> with_intel_runtime_pm(&i915->runtime_pm, wakeref) { >> i915_ggtt_resume(to_gt(i915)->ggtt); >> + if (GRAPHICS_VER(i915) >= 8) >> + setup_private_pat(to_gt(i915)); >> i915_gem_resume(i915); >> } >> } >> diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c >> index 568840e7ca66..b519d271f4fe 100644 >> --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c >> +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c >> @@ -127,6 +127,7 @@ void mock_init_ggtt(struct intel_gt *gt) >> ggtt->vm.vma_ops.bind_vma = mock_bind_ggtt; >> ggtt->vm.vma_ops.unbind_vma = mock_unbind_ggtt; >> >> + INIT_LIST_HEAD(&ggtt->gt_list); >> i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); >> } >> >> -- >> 2.25.1 >> >