On Mon, Mar 06, 2023 at 08:33:04AM +0000, Gupta, Anshuman wrote: > > > > -----Original Message----- > > From: Nilawar, Badal <badal.nilawar@xxxxxxxxx> > > Sent: Saturday, March 4, 2023 9:48 PM > > To: intel-gfx@xxxxxxxxxxxxxxxxxxxxx > > Cc: Gupta, Anshuman <anshuman.gupta@xxxxxxxxx>; Ewins, Jon > > <jon.ewins@xxxxxxxxx>; Belgaumkar, Vinay <vinay.belgaumkar@xxxxxxxxx>; > > Vivi, Rodrigo <rodrigo.vivi@xxxxxxxxx> > > Subject: [PATCH] drm/i915/mtl: Extend Wa_14017073508 in suspend/resume > > flow > > > > During suspend resume flow it was seen that lots forcewakes are taken and > > released for media. So to avoid HANG due to MC6 entry/exit while accessing > > media applied Wa_14017073508 in suspend/resume flow. > We need to fix such gaps in runtime PM as well, but that will make this WA dirty somewhat similar to other RC6 related WA on other platforms. > I suggest to disable the MC6 rather on A step rather than having this form of WA. This WA is not needed on A step. > @rodrigo and @matt what is you opinion here ? My opinion since day-0 is that we should simply disable MC6 on A0 parts and move on with this feature only on B0. Since the beginning it was pretty clear that this workaround would get out of control and become the same as the PVC-RC6 one. > > However some comment her to improve this patch. > > > > > Signed-off-by: Badal Nilawar <badal.nilawar@xxxxxxxxx> > > --- > > drivers/gpu/drm/i915/gem/i915_gem_pm.c | 6 ++++- > > drivers/gpu/drm/i915/gt/intel_gt_pm.c | 32 ++++++++++++++++-------- > > drivers/gpu/drm/i915/gt/intel_gt_pm.h | 3 +++ > > drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 + > > drivers/gpu/drm/i915/i915_driver.c | 32 ++++++++++++++++++++++++ > > 5 files changed, 62 insertions(+), 12 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c > > b/drivers/gpu/drm/i915/gem/i915_gem_pm.c > > index 0d812f4d787d..60deac41104d 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c > > @@ -160,8 +160,12 @@ void i915_gem_suspend_late(struct > > drm_i915_private *i915) > > * machine in an unusable condition. > > */ > > > > - for_each_gt(gt, i915, i) > > + for_each_gt(gt, i915, i) { > > + /* Wa_14017073508: mtl */ > > + mtl_media_busy(gt); > Let's add these busy/idle in intel_gt_suspend_late() > > intel_gt_suspend_late(gt); > > + mtl_media_idle(gt); > > + } > > > > spin_lock_irqsave(&i915->mm.obj_lock, flags); > > for (phase = phases; *phase; phase++) { diff --git > > a/drivers/gpu/drm/i915/gt/intel_gt_pm.c > > b/drivers/gpu/drm/i915/gt/intel_gt_pm.c > > index cef3d6f5c34e..1f3bf1cf0421 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c > > @@ -26,24 +26,34 @@ > > > > #define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2) > > > > -static void mtl_media_busy(struct intel_gt *gt) > > +void mtl_media_busy(struct intel_gt *gt) > > { > > /* Wa_14017073508: mtl */ > > - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) && > > + if (gt && IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) > > && > > gt->type == GT_MEDIA) > Let's have this in a separate patch , I believe that is already floated and ready to merge. > > - snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE, > > - PCODE_MBOX_GT_STATE_MEDIA_BUSY, > > - > > PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0); > > + if (!atomic_inc_not_zero(>->media_wakeref)) { > > + drm_err(>->i915->drm, "Media busy\n"); > This should not be Error message. > > + snb_pcode_write_p(gt->uncore, > > PCODE_MBOX_GT_STATE, > > + > > PCODE_MBOX_GT_STATE_MEDIA_BUSY, > > + > > PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0); > > + atomic_inc(>->media_wakeref); > > + } > > } > > > > -static void mtl_media_idle(struct intel_gt *gt) > > +void mtl_media_idle(struct intel_gt *gt) > > { > > /* Wa_14017073508: mtl */ > > - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) && > > - gt->type == GT_MEDIA) > > - snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE, > > - > > PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY, > > - > > PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0); > > + if (gt && IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) > > && > > + gt->type == GT_MEDIA) { > > + WARN_ON(atomic_read(>->media_wakeref) <= 0); > > + if (!atomic_add_unless(>->media_wakeref, -1, 1)) { > > + drm_err(>->i915->drm, "Media idle\n"); > Same here this can't be error message. > Thanks, > Anshuman Gupta. > > + snb_pcode_write_p(gt->uncore, > > PCODE_MBOX_GT_STATE, > > + > > PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY, > > + > > PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0); > > + atomic_dec(>->media_wakeref); > > + } > > + } > > } > > > > static void user_forcewake(struct intel_gt *gt, bool suspend) diff --git > > a/drivers/gpu/drm/i915/gt/intel_gt_pm.h > > b/drivers/gpu/drm/i915/gt/intel_gt_pm.h > > index 6c9a46452364..43ffabf9babe 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h > > @@ -89,4 +89,7 @@ static inline bool is_mock_gt(const struct intel_gt *gt) > > return I915_SELFTEST_ONLY(gt->awake == -ENODEV); } > > > > +void mtl_media_busy(struct intel_gt *gt); void mtl_media_idle(struct > > +intel_gt *gt); > > + > > #endif /* INTEL_GT_PM_H */ > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h > > b/drivers/gpu/drm/i915/gt/intel_gt_types.h > > index f08c2556aa25..321ccfef4028 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h > > @@ -145,6 +145,7 @@ struct intel_gt { > > > > struct intel_wakeref wakeref; > > atomic_t user_wakeref; > > + atomic_t media_wakeref; > > > > struct list_head closed_vma; > > spinlock_t closed_lock; /* guards the list of closed_vma */ diff --git > > a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c > > index 8bc76dede332..c7625ea28022 100644 > > --- a/drivers/gpu/drm/i915/i915_driver.c > > +++ b/drivers/gpu/drm/i915/i915_driver.c > > @@ -327,6 +327,8 @@ static int i915_driver_mmio_probe(struct > > drm_i915_private *dev_priv) > > intel_gmch_bar_setup(dev_priv); > > intel_device_info_runtime_init(dev_priv); > > > > + /* Wa_14017073508: mtl */ > > + mtl_media_busy(dev_priv->media_gt); > > for_each_gt(gt, dev_priv, i) { > > ret = intel_gt_init_mmio(gt); > > if (ret) > > @@ -335,10 +337,14 @@ static int i915_driver_mmio_probe(struct > > drm_i915_private *dev_priv) > > > > /* As early as possible, scrub existing GPU state before clobbering */ > > sanitize_gpu(dev_priv); > > + /* Wa_14017073508: mtl */ > > + mtl_media_idle(dev_priv->media_gt); > > > > return 0; > > > > err_uncore: > > + /* Wa_14017073508: mtl */ > > + mtl_media_idle(dev_priv->media_gt); > > intel_gmch_bar_teardown(dev_priv); > > > > return ret; > > @@ -781,6 +787,12 @@ int i915_driver_probe(struct pci_dev *pdev, const > > struct pci_device_id *ent) > > if (ret < 0) > > goto out_tiles_cleanup; > > > > + /* Wa_14017073508: mtl */ > > + if (i915->media_gt) { > > + atomic_set(&i915->media_gt->media_wakeref, 0); > > + mtl_media_busy(i915->media_gt); > > + } > > + > > ret = i915_driver_hw_probe(i915); > > if (ret < 0) > > goto out_cleanup_mmio; > > @@ -838,6 +850,8 @@ int i915_driver_probe(struct pci_dev *pdev, const > > struct pci_device_id *ent) > > i915_gem_drain_freed_objects(i915); > > i915_ggtt_driver_late_release(i915); > > out_cleanup_mmio: > > + /* Wa_14017073508: mtl */ > > + mtl_media_idle(i915->media_gt); > > i915_driver_mmio_release(i915); > > out_tiles_cleanup: > > intel_gt_release_all(i915); > > @@ -1063,6 +1077,9 @@ static int i915_drm_suspend(struct drm_device > > *dev) > > > > disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); > > > > + /* Wa_14017073508: mtl */ > > + mtl_media_busy(dev_priv->media_gt); > > + > > /* We do a lot of poking in a lot of registers, make sure they work > > * properly. */ > > intel_power_domains_disable(dev_priv); > > @@ -1097,6 +1114,9 @@ static int i915_drm_suspend(struct drm_device > > *dev) > > > > intel_dmc_suspend(dev_priv); > > > > + /* Wa_14017073508: mtl */ > > + mtl_media_busy(dev_priv->media_gt); > > + > > enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); > > > > i915_gem_drain_freed_objects(dev_priv); > > @@ -1197,6 +1217,9 @@ static int i915_drm_resume(struct drm_device > > *dev) > > > > disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); > > > > + /* Wa_14017073508: mtl */ > > + mtl_media_busy(dev_priv->media_gt); > > + > > ret = i915_pcode_init(dev_priv); > > if (ret) > > return ret; > > @@ -1260,6 +1283,9 @@ static int i915_drm_resume(struct drm_device > > *dev) > > > > intel_gvt_resume(dev_priv); > > > > + /* Wa_14017073508: mtl */ > > + mtl_media_idle(dev_priv->media_gt); > > + > > enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); > > > > return 0; > > @@ -1319,6 +1345,9 @@ static int i915_drm_resume_early(struct > > drm_device *dev) > > > > disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); > > > > + /* Wa_14017073508: mtl */ > > + mtl_media_busy(dev_priv->media_gt); > > + > > ret = vlv_resume_prepare(dev_priv, false); > > if (ret) > > drm_err(&dev_priv->drm, > > @@ -1333,6 +1362,9 @@ static int i915_drm_resume_early(struct > > drm_device *dev) > > > > intel_power_domains_resume(dev_priv); > > > > + /* Wa_14017073508: mtl */ > > + mtl_media_idle(dev_priv->media_gt); > > + > > enable_rpm_wakeref_asserts(&dev_priv->runtime_pm); > > > > return ret; > > -- > > 2.25.1 >