If we attempt to wake up a waiter, who is currently checking the seqno it will be in the TASK_INTERRUPTIBLE state and ttwu will report success. However, it is actually awake and functioning -- so delay reporting the actual wake up until it sleeps. This fixes some spurious claims of missed_breadcrumbs when running under heavy load; i.e. sufficient load to preempt away the newly woken waiter before they complete their checks. However, it does so at the cost of a rare false negative; where the waiter changes between the check and ttwu -- the only way to fix that would be to extend the reporting from ttwu where the check could be done atomically. v2: Defend against !CONFIG_SMP v3: Don't filter out calls to wake_up_process Testcase: igt/drv_missed_irq # sanity check we do detect missed_breadcrumb() Testcase: igt/gem_concurrent_blit # for generating false positives References: https://bugs.freedesktop.org/show_bug.cgi?id=100007 Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 39 ++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 24c6fefdd0b1..76e6f8e7cfd4 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -27,6 +27,12 @@ #include "i915_drv.h" +#ifdef CONFIG_SMP +#define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_cpu) +#else +#define task_asleep(tsk) ((tsk)->state & TASK_NORMAL) +#endif + static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) { struct intel_wait *wait; @@ -36,8 +42,20 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) wait = b->irq_wait; if (wait) { + /* + * N.B. Since task_asleep() and ttwu are not atomic, the + * waiter may actually go to sleep after the check, causing + * us to suppress a valid wakeup. We prefer to reduce the + * number of false positive missed_breadcrumb() warnings + * at the expense of a few false negatives, as it it easy + * to trigger a false positive under heavy load. Enough + * signal should remain from genuine missed_breadcrumb() + * for us to detect in CI. + */ + bool was_asleep = task_asleep(wait->tsk); + result = ENGINE_WAKEUP_WAITER; - if (wake_up_process(wait->tsk)) + if (wake_up_process(wait->tsk) && was_asleep) result |= ENGINE_WAKEUP_ASLEEP; } @@ -47,12 +65,15 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; - unsigned int result; + unsigned int result = 0; - spin_lock_irqsave(&b->irq_lock, flags); - result = __intel_breadcrumbs_wakeup(b); - spin_unlock_irqrestore(&b->irq_lock, flags); + if (READ_ONCE(b->irq_wait)) { + unsigned long flags; + + spin_lock_irqsave(&b->irq_lock, flags); + result = __intel_breadcrumbs_wakeup(b); + spin_unlock_irqrestore(&b->irq_lock, flags); + } return result; } @@ -77,8 +98,8 @@ static noinline void missed_breadcrumb(struct intel_engine_cs *engine) static void intel_breadcrumbs_hangcheck(struct timer_list *t) { - struct intel_engine_cs *engine = from_timer(engine, t, - breadcrumbs.hangcheck); + struct intel_engine_cs *engine = + from_timer(engine, t, breadcrumbs.hangcheck); struct intel_breadcrumbs *b = &engine->breadcrumbs; if (!b->irq_armed) @@ -104,7 +125,7 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t) */ if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { missed_breadcrumb(engine); - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + mod_timer(&b->fake_irq, jiffies + 1); } else { mod_timer(&b->hangcheck, wait_timeout()); } -- 2.15.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx