Re: [Intel-gfx] [PATCH] drm/i915: Improve on suspend / resume time with VT-d enabled

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 5 Apr 2022 at 13:00, Thomas Hellström
<thomas.hellstrom@xxxxxxxxxxxxxxx> wrote:
>
> When DMAR / VT-d is enabled, the display engine uses overfetching,
> presumably to deal with the increased latency. To avoid display engine
> errors and DMAR faults, as a workaround the GGTT is populated with scatch
> PTEs when VT-d is enabled. However starting with gen10, Write-combined
> writing of scratch PTES is no longer possible and as a result, populating
> the full GGTT with scratch PTEs like on resume becomes very slow as
> uncached access is needed.
>
> Therefore, on integrated GPUs utilize the fact that the PTEs are stored in
> stolen memory which retain content across S3 suspend. Don't clear the PTEs
> on suspend and resume. This improves on resume time with around 100 ms.
> While 100+ms might appear like a short time it's 10% to 20% of total resume
> time and important in some applications.
>
> One notable exception is Intel Rapid Start Technology which may cause
> stolen memory to be lost across what the OS percieves as S3 suspend.
> If IRST is enabled or if we can't detect whether IRST is enabled, retain
> the old workaround, clearing and re-instating PTEs.
>
> As an additional measure, if we detect that the last ggtt pte was lost
> during suspend, print a warning and re-populate the GGTT ptes
>
> On discrete GPUs, the display engine scans out from LMEM which isn't
> subject to DMAR, and presumably the workaround is therefore not needed,
> but that needs to be verified and disabling the workaround for dGPU,
> if possible, will be deferred to a follow-up patch.
>
> v2:
> - Rely on retained ptes to also speed up suspend and resume re-binding.
> - Re-build GGTT ptes if Intel rst is enabled.
> v3:
> - Re-build GGTT ptes also if we can't detect whether Intel rst is enabled,
>   and if the guard page PTE and end of GGTT was lost.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>

It's not great, but I think it's better than building another
complication into our allocator just to handle this all.

Acked-by: Daniel Vetter <daniel.vetter@xxxxxxxx>

> ---
>  drivers/gpu/drm/i915/gt/intel_ggtt.c | 56 +++++++++++++++++++++++++---
>  drivers/gpu/drm/i915/gt/intel_gtt.h  | 20 ++++++++++
>  drivers/gpu/drm/i915/i915_driver.c   | 16 ++++++++
>  3 files changed, 86 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 04191fe2ee34..98441b1c1b75 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -23,6 +23,13 @@
>  #include "intel_gtt.h"
>  #include "gen8_ppgtt.h"
>
> +static inline bool suspend_retains_ptes(struct i915_address_space *vm)
> +{
> +       return GRAPHICS_VER(vm->i915) >= 8 &&
> +               !HAS_LMEM(vm->i915) &&
> +               vm->is_ggtt;
> +}
> +
>  static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
>                                    unsigned long color,
>                                    u64 *start,
> @@ -116,6 +123,23 @@ static bool needs_idle_maps(struct drm_i915_private *i915)
>         return false;
>  }
>
> +/*
> + * Return the value of the last GGTT pte cast to an u64, if
> + * the system is supposed to retain ptes across resume. 0 otherwise.
> + */
> +static u64 read_last_pte(struct i915_address_space *vm)
> +{
> +       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> +       gen8_pte_t __iomem *ptep;
> +
> +       if (!suspend_retains_ptes(vm))
> +               return 0;
> +
> +       GEM_BUG_ON(GRAPHICS_VER(vm->i915) < 8);
> +       ptep = (typeof(ptep))ggtt->gsm + (ggtt_total_entries(ggtt) - 1);
> +       return readq(ptep);
> +}
> +
>  /**
>   * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM
>   * @vm: The VM to suspend the mappings for
> @@ -179,7 +203,10 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
>                 i915_gem_object_unlock(obj);
>         }
>
> -       vm->clear_range(vm, 0, vm->total);
> +       if (!suspend_retains_ptes(vm))
> +               vm->clear_range(vm, 0, vm->total);
> +       else
> +               i915_vm_to_ggtt(vm)->probed_pte = read_last_pte(vm);
>
>         vm->skip_pte_rewrite = save_skip_rewrite;
>
> @@ -578,6 +605,8 @@ static int init_ggtt(struct i915_ggtt *ggtt)
>         struct drm_mm_node *entry;
>         int ret;
>
> +       ggtt->pte_lost = true;
> +
>         /*
>          * GuC requires all resources that we're sharing with it to be placed in
>          * non-WOPCM memory. If GuC is not present or not in use we still need a
> @@ -1309,11 +1338,20 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
>  {
>         struct i915_vma *vma;
>         bool write_domain_objs = false;
> +       bool retained_ptes;
>
>         drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
>
> -       /* First fill our portion of the GTT with scratch pages */
> -       vm->clear_range(vm, 0, vm->total);
> +       /*
> +        * First fill our portion of the GTT with scratch pages if
> +        * they were not retained across suspend.
> +        */
> +       retained_ptes = suspend_retains_ptes(vm) &&
> +               !i915_vm_to_ggtt(vm)->pte_lost &&
> +               !GEM_WARN_ON(i915_vm_to_ggtt(vm)->probed_pte != read_last_pte(vm));
> +
> +       if (!retained_ptes)
> +               vm->clear_range(vm, 0, vm->total);
>
>         /* clflush objects bound into the GGTT and rebind them. */
>         list_for_each_entry(vma, &vm->bound_list, vm_link) {
> @@ -1322,9 +1360,10 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
>                         atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
>
>                 GEM_BUG_ON(!was_bound);
> -               vma->ops->bind_vma(vm, NULL, vma->resource,
> -                                  obj ? obj->cache_level : 0,
> -                                  was_bound);
> +               if (!retained_ptes)
> +                       vma->ops->bind_vma(vm, NULL, vma->resource,
> +                                          obj ? obj->cache_level : 0,
> +                                          was_bound);
>                 if (obj) { /* only used during resume => exclusive access */
>                         write_domain_objs |= fetch_and_zero(&obj->write_domain);
>                         obj->read_domains |= I915_GEM_DOMAIN_GTT;
> @@ -1352,3 +1391,8 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
>
>         intel_ggtt_restore_fences(ggtt);
>  }
> +
> +void i915_ggtt_mark_pte_lost(struct drm_i915_private *i915, bool val)
> +{
> +       to_gt(i915)->ggtt->pte_lost = val;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index 4529b5e9f6e6..7561672c4f17 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -345,6 +345,15 @@ struct i915_ggtt {
>
>         bool do_idle_maps;
>
> +       /**
> +        * Whether the system was recently restored from hibernate and
> +        * thus may have lost pte content.
> +        */
> +       bool pte_lost;
> +
> +       /** Probed pte value on suspend. Re-checked on resume. */
> +       u64 probed_pte;
> +
>         int mtrr;
>
>         /** Bit 6 swizzling required for X tiling */
> @@ -571,6 +580,17 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm);
>  void i915_ggtt_suspend(struct i915_ggtt *gtt);
>  void i915_ggtt_resume(struct i915_ggtt *ggtt);
>
> +/**
> + * i915_ggtt_mark_pte_lost - Mark ggtt ptes as lost or clear such a marking
> + * @i915 The device private.
> + * @val whether the ptes should be marked as lost.
> + *
> + * In some cases pte content is retained across suspend, but typically lost
> + * across hibernate. Typically they should be marked as lost on
> + * hibernation restore and such marking cleared on suspend.
> + */
> +void i915_ggtt_mark_pte_lost(struct drm_i915_private *i915, bool val);
> +
>  void
>  fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count);
>
> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
> index 64e6f76861f9..f50256e4c2d2 100644
> --- a/drivers/gpu/drm/i915/i915_driver.c
> +++ b/drivers/gpu/drm/i915/i915_driver.c
> @@ -98,6 +98,9 @@
>  #include "intel_region_ttm.h"
>  #include "vlv_suspend.h"
>
> +/* Intel Rapid Start Technology ACPI device name */
> +static const char irst_name[] = "INT3392";
> +
>  static const struct drm_driver i915_drm_driver;
>
>  static int i915_get_bridge_dev(struct drm_i915_private *dev_priv)
> @@ -1425,6 +1428,8 @@ static int i915_pm_suspend(struct device *kdev)
>                 return -ENODEV;
>         }
>
> +       i915_ggtt_mark_pte_lost(i915, false);
> +
>         if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF)
>                 return 0;
>
> @@ -1477,6 +1482,14 @@ static int i915_pm_resume(struct device *kdev)
>         if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF)
>                 return 0;
>
> +       /*
> +        * If IRST is enabled, or if we can't detect whether it's enabled,
> +        * then we must assume we lost the GGTT page table entries, since
> +        * they are not retained if IRST decided to enter S4.
> +        */
> +       if (!IS_ENABLED(CONFIG_ACPI) || acpi_dev_present(irst_name, NULL, -1))
> +               i915_ggtt_mark_pte_lost(i915, true);
> +
>         return i915_drm_resume(&i915->drm);
>  }
>
> @@ -1536,6 +1549,9 @@ static int i915_pm_restore_early(struct device *kdev)
>
>  static int i915_pm_restore(struct device *kdev)
>  {
> +       struct drm_i915_private *i915 = kdev_to_i915(kdev);
> +
> +       i915_ggtt_mark_pte_lost(i915, true);
>         return i915_pm_resume(kdev);
>  }
>
> --
> 2.34.1
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch




[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux