Replace the racy continuation check within retire_work with a definite kill-switch on idling. The race was being exposed by gem_concurrent_blit where the retire_worker would be terminated too early leaving us spinning in debugfs/i915_drop_caches with nothing flushing the retirement queue. Although that the igt is trying to idle from one child while submitting from another may be a contributing factor as to why it runs so slowly... Testcase: igt/gem_concurrent_blit Fixes: 79ffac8599c4 ("drm/i915: Invert the GEM wakeref hierarchy") Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_pm.c | 27 ++++++++++--------- .../gpu/drm/i915/selftests/mock_gem_device.c | 3 +-- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c index 3b6e8d5be8e1..88be810758ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/i915_gem_pm.c @@ -46,15 +46,23 @@ static void idle_work_handler(struct work_struct *work) { struct drm_i915_private *i915 = container_of(work, typeof(*i915), gem.idle_work.work); + bool restart = true; + cancel_delayed_work_sync(&i915->gem.retire_work); mutex_lock(&i915->drm.struct_mutex); intel_wakeref_lock(&i915->gt.wakeref); - if (!intel_wakeref_active(&i915->gt.wakeref)) + if (!intel_wakeref_active(&i915->gt.wakeref)) { i915_gem_park(i915); + restart = false; + } intel_wakeref_unlock(&i915->gt.wakeref); mutex_unlock(&i915->drm.struct_mutex); + if (restart) + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); } static void retire_work_handler(struct work_struct *work) @@ -68,10 +76,9 @@ static void retire_work_handler(struct work_struct *work) mutex_unlock(&i915->drm.struct_mutex); } - if (intel_wakeref_active(&i915->gt.wakeref)) - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); } static int pm_notifier(struct notifier_block *nb, @@ -159,15 +166,9 @@ void i915_gem_suspend(struct drm_i915_private *i915) * reset the GPU back to its idle, low power state. */ GEM_BUG_ON(i915->gt.awake); - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - - drain_delayed_work(&i915->gem.retire_work); + flush_delayed_work(&i915->gem.idle_work); - /* - * As the idle_work is rearming if it detects a race, play safe and - * repeat the flush until it is definitely idle. - */ - drain_delayed_work(&i915->gem.idle_work); + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); i915_gem_drain_freed_objects(i915); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index e4033d0576c4..ce54f8dc13cc 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -58,8 +58,7 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_lost(i915); mutex_unlock(&i915->drm.struct_mutex); - drain_delayed_work(&i915->gem.retire_work); - drain_delayed_work(&i915->gem.idle_work); + flush_delayed_work(&i915->gem.idle_work); i915_gem_drain_workqueue(i915); mutex_lock(&i915->drm.struct_mutex); -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx