During hangcheck we access the hardware registers, for which we must hold a runtime pm reference. Hangcheck also should only be running whilst the GPU is active, and we hold a runtime pm whilst the GPU is busy. Therefore, if the runtime pm is suspended (no wakelocks held anywhere) we know the GPU is already idle and we can skip the hangcheck (and all further hangchecks until the next request is submitted to the GPU, waking it up). Currently, hangcheck relies upon being flushed during intel_runtime_suspend() but is being done so too late causing invalid hardware access whilst the device is being suspend. By taking an explicit wakelock (albeit only if already awake) inside hangcheck we can remove the synchronous cancellation from the suspend function. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93121 Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Imre Deak <imre.deak@xxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.c | 1 - drivers/gpu/drm/i915/i915_irq.c | 9 +++++++++ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_runtime_pm.c | 13 +++++++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index e6935f1cb689..ec5ccaad021d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1475,7 +1475,6 @@ static int intel_runtime_suspend(struct device *device) return ret; } - cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); intel_uncore_forcewake_reset(dev, false); dev_priv->pm.suspended = true; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e88d692583a5..24a4cf45f282 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2989,6 +2989,13 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (!i915.enable_hangcheck) return; + /* If the runtime pm is off, then the GPU is asleep and we are + * completely idle, so we can belatedly cancel hangcheck. Hangcheck + * will be restarted on the next request. + */ + if (!intel_runtime_pm_tryget(dev_priv)) + return; + for_each_ring(ring, dev_priv, i) { u64 acthd; u32 seqno; @@ -3080,6 +3087,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) } } + intel_runtime_pm_put(dev_priv); + if (rings_hung) return i915_handle_error(dev, true, "Ring hung"); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 8963a8a53b0b..158e53d1ba26 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1428,6 +1428,7 @@ void intel_display_power_get(struct drm_i915_private *dev_priv, void intel_display_power_put(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); void intel_runtime_pm_get(struct drm_i915_private *dev_priv); +bool intel_runtime_pm_tryget(struct drm_i915_private *dev_priv); void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv); void intel_runtime_pm_put(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 2c2151f1c47e..5e03c9791239 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -2233,6 +2233,19 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) WARN(dev_priv->pm.suspended, "Device still suspended.\n"); } +bool intel_runtime_pm_tryget(struct drm_i915_private *dev_priv) +{ + /* XXX Ideally we would push this to pm_runtime_tryget() */ +#ifdef CONFIG_PM + if (HAS_RUNTIME_PM(dev_priv)) { + struct device *device = &dev_priv->dev->pdev->dev; + return atomic_inc_unless_zero(&dev->power.usage_count); + } +#else + return true; +#endif +} + /** * intel_runtime_pm_get_noresume - grab a runtime pm reference * @dev_priv: i915 device instance -- 2.6.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx