Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > If a worker requeues itself, it may switch to a different kworker pool, > which flush_work() considers as complete. To be strict, we then need to > keep flushing the work until it is no longer pending. > > References: https://bugs.freedesktop.org/show_bug.cgi?id=102456 > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 3 +-- > drivers/gpu/drm/i915/i915_gem.c | 3 +-- > drivers/gpu/drm/i915/i915_utils.h | 13 +++++++++++++ > 3 files changed, 15 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 48572b157222..1dd687a74879 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -4202,8 +4202,7 @@ fault_irq_set(struct drm_i915_private *i915, > mutex_unlock(&i915->drm.struct_mutex); > > /* Flush idle worker to disarm irq */ > - while (flush_delayed_work(&i915->gt.idle_work)) > - ; > + drain_delayed_work(&i915->gt.idle_work); > > return 0; > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index e4cc08bc518c..1829d3fa15d1 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -4582,8 +4582,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) > /* As the idle_work is rearming if it detects a race, play safe and > * repeat the flush until it is definitely idle. > */ > - while (flush_delayed_work(&dev_priv->gt.idle_work)) > - ; > + drain_delayed_work(&dev_priv->gt.idle_work); > > /* Assert that we sucessfully flushed all the work and > * reset the GPU back to its idle, low power state. > diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h > index 12fc250b47b9..4f7ffa0976b1 100644 > --- a/drivers/gpu/drm/i915/i915_utils.h > +++ b/drivers/gpu/drm/i915/i915_utils.h > @@ -119,4 +119,17 @@ static inline void __list_del_many(struct list_head *head, > WRITE_ONCE(head->next, first); > } > > +/* > + * Wait until the work is finally complete, even if it tries to postpone > + * by requeueing itself. Note, that if the worker never cancels itself, > + * we will spin forever. > + */ > +static inline void drain_delayed_work(struct delayed_work *dw) > +{ > + do { > + while (flush_delayed_work(dw)) > + ; > + } while (delayed_work_pending(dw)); So we end spinning if someone doesn't let go of mutex. Add a timeout for doing this and let it slide into suspend with a error message anyways instead of spinning forever? -Mika > +} > + > #endif /* !__I915_UTILS_H */ > -- > 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx