Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > We cannot allow ourselves to wait on the GPU while holding any lock we s/we/as we? My english parser is not strong. > may need to reset the GPU. While there is not an explicit lock between > the two operations, lockdep cannot detect the dependency. So let's tell > lockdep about the wait/reset dependency with an explicit lockmap. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > --- > This is *annoyingly* good at detecting lock cycles in GPU reset. > -Chris > --- > drivers/gpu/drm/i915/gt/intel_reset.c | 5 ++++- > drivers/gpu/drm/i915/i915_drv.h | 8 ++++++++ > drivers/gpu/drm/i915/i915_gem.c | 3 +++ > drivers/gpu/drm/i915/i915_request.c | 2 ++ > drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 ++ > 5 files changed, 19 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c > index 60d24110af80..6368b37f26d1 100644 > --- a/drivers/gpu/drm/i915/gt/intel_reset.c > +++ b/drivers/gpu/drm/i915/gt/intel_reset.c > @@ -978,10 +978,11 @@ void i915_reset(struct drm_i915_private *i915, > > might_sleep(); > GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); > + lock_map_acquire(&i915->gt.reset_lockmap); > > /* Clear any previous failed attempts at recovery. Time to try again. */ > if (!__i915_gem_unset_wedged(i915)) > - return; > + goto unlock; > > if (reason) > dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); > @@ -1029,6 +1030,8 @@ void i915_reset(struct drm_i915_private *i915, > > finish: > reset_finish(i915); > +unlock: > + lock_map_release(&i915->gt.reset_lockmap); > return; The return patter in this function is rather unorthodox. Might be even that I reviewed it. Very close that I fell into trap of thinking that you return with lock held. > > taint: > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 0ea7f78ae227..9cfa9500fcc4 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1919,6 +1919,14 @@ struct drm_i915_private { > ktime_t last_init_time; > > struct i915_vma *scratch; > + > + /* > + * We must never wait on the GPU while holding a lock we may My english parser still expected 'as' somewhere in there. Regardless, a neat way to use lockdep to enforce our expectations. Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > + * need to perform a GPU reset. So while we don't need to > + * serialise wait/reset with an explicit lock, we do want > + * lockdep to detect potential dependency cycles. > + */ > + struct lockdep_map reset_lockmap; > } gt; > > struct { > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index e980c1ee3dcf..24f0f3db1bfb 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1782,6 +1782,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915) > > int i915_gem_init_early(struct drm_i915_private *dev_priv) > { > + static struct lock_class_key reset_key; > int err; > > intel_gt_pm_init(dev_priv); > @@ -1789,6 +1790,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) > INIT_LIST_HEAD(&dev_priv->gt.active_rings); > INIT_LIST_HEAD(&dev_priv->gt.closed_vma); > spin_lock_init(&dev_priv->gt.closed_lock); > + lockdep_init_map(&dev_priv->gt.reset_lockmap, > + "i915.reset", &reset_key, 0); > > i915_gem_init__mm(dev_priv); > i915_gem_init__pm(dev_priv); > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 1a948471829d..cd4ce44dc4e5 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -1444,6 +1444,7 @@ long i915_request_wait(struct i915_request *rq, > return -ETIME; > > trace_i915_request_wait_begin(rq, flags); > + lock_map_acquire(&rq->i915->gt.reset_lockmap); > > /* > * Optimistic spin before touching IRQs. > @@ -1517,6 +1518,7 @@ long i915_request_wait(struct i915_request *rq, > dma_fence_remove_callback(&rq->fence, &wait.cb); > > out: > + lock_map_release(&rq->i915->gt.reset_lockmap); > trace_i915_request_wait_end(rq); > return timeout; > } > diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > index b7f3fbb4ae89..1e9ffced78c1 100644 > --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c > +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > @@ -130,6 +130,7 @@ static struct dev_pm_domain pm_domain = { > > struct drm_i915_private *mock_gem_device(void) > { > + static struct lock_class_key reset_key; > struct drm_i915_private *i915; > struct pci_dev *pdev; > int err; > @@ -204,6 +205,7 @@ struct drm_i915_private *mock_gem_device(void) > INIT_LIST_HEAD(&i915->gt.active_rings); > INIT_LIST_HEAD(&i915->gt.closed_vma); > spin_lock_init(&i915->gt.closed_lock); > + lockdep_init_map(&i915->gt.reset_lockmap, "i915.reset", &reset_key, 0); > > mutex_lock(&i915->drm.struct_mutex); > > -- > 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx