On Sun, Jul 10, 2016 at 07:11:24PM +0530, akash.goel@xxxxxxxxx wrote: > @@ -1707,8 +1692,8 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) > I915_READ(SOFT_SCRATCH(15)) & ~msg); > > /* Handle flush interrupt event in bottom half */ > - queue_work(dev_priv->guc.log.wq, > - &dev_priv->guc.events_work); > + smp_store_mb(dev_priv->guc.log.flush_signal, 1); > + wake_up_process(dev_priv->guc.log.flush_task); > } > +void guc_cancel_log_flush_work_sync(struct drm_i915_private *dev_priv) > +{ > + spin_lock_irq(&dev_priv->irq_lock); > + dev_priv->guc.log.flush_signal = false; > + spin_unlock_irq(&dev_priv->irq_lock); > + > + if (dev_priv->guc.log.flush_task) > + wait_for_completion(&dev_priv->guc.log.flush_completion); > +} > + > +static int guc_log_flush_worker(void *arg) > +{ > + struct drm_i915_private *dev_priv = arg; > + struct intel_guc *guc = &dev_priv->guc; > + > + /* Install ourselves with high priority to reduce signalling latency */ > + struct sched_param param = { .sched_priority = 1 }; > + sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); > + > + do { > + set_current_state(TASK_INTERRUPTIBLE); > + > + spin_lock_irq(&dev_priv->irq_lock); > + if (guc->log.flush_signal) { > + guc->log.flush_signal = false; > + reinit_completion(&guc->log.flush_completion); > + spin_unlock_irq(&dev_priv->irq_lock); > + i915_guc_capture_logs(&dev_priv->drm); > + complete_all(&guc->log.flush_completion); > + } else { > + spin_unlock_irq(&dev_priv->irq_lock); > + if (kthread_should_stop()) > + break; > + > + schedule(); > + } > + } while (1); > + __set_current_state(TASK_RUNNING); > + > + return 0; This looks decidely fishy. irq handler: smp_store_mb(log.signal, 1); wake_up_process(log.tsk); worker: do { set_current_state(TASK_INT); while (cmpxchg(&log.signal, 1, 0)) { reinit_completion(log.complete); i915_guc_capture_logs(); } complete_all(log.complete); if (kthread_should_stop()) break; schedule(); } while(1); __set_current_state(TASK_RUNNING); flush: smp_store_mb(log.signal, 0); wait_for_completion(log.complete); In the end, just the silly locking and placement of complete_all() is dangerous. reinit_completion() lacks the barrier to be used like this really, at any rate, racy with the irq handler, so use sparingly or when you control the irq handler. (Also not sure if log.signal = 0 is sane, or the current callsites really require the flush.) -Chris -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx