On Wed, 2022-08-17 at 09:55 +0300, Sviatoslav Peleshko wrote: > The i915_gem_object_trylock we had in the grab_vma() makes it return > false > when the vma->obj is already locked. In this case we'll skip this vma > during eviction, and eventually might be forced to return -ENOSPC > even > though we could've evicted this vma if we waited for the lock a bit. > > To fix this, replace the i915_gem_object_trylock with > i915_gem_object_lock. > And because we have to worry about the potential deadlock now, > bubble-up > the error code, so it will be correctly handled by the WW mechanism. > > This fixes the issue > https://gitlab.freedesktop.org/drm/intel/-/issues/6564 > > Fixes: 7e00897be8bf ("drm/i915: Add object locking to > i915_gem_evict_for_node and i915_gem_evict_something, v2.") > Signed-off-by: Sviatoslav Peleshko > <sviatoslav.peleshko@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem_evict.c | 69 ++++++++++++++++++------- > -- > 1 file changed, 46 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c > b/drivers/gpu/drm/i915/i915_gem_evict.c > index f025ee4fa526..9d43f213f68f 100644 > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > @@ -55,49 +55,58 @@ static int ggtt_flush(struct intel_gt *gt) > return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); > } > > -static bool grab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx > *ww) > +static int grab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx > *ww) > { > + int ret = 0; > + > /* > * We add the extra refcount so the object doesn't drop to > zero until > * after ungrab_vma(), this way trylock is always paired with > unlock. > */ > if (i915_gem_object_get_rcu(vma->obj)) { > - if (!i915_gem_object_trylock(vma->obj, ww)) { > + ret = i915_gem_object_lock(vma->obj, ww); Isn't the vm->mutex held here? If so, then this would be a lockdep violation. /Thomas > + if (ret) > i915_gem_object_put(vma->obj); > - return false; > - } > } else { > /* Dead objects don't need pins */ > atomic_and(~I915_VMA_PIN_MASK, &vma->flags); > } > > - return true; > + return ret; > } > > -static void ungrab_vma(struct i915_vma *vma) > +static void ungrab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx > *ww) > { > if (dying_vma(vma)) > return; > > - i915_gem_object_unlock(vma->obj); > + if (!ww) > + i915_gem_object_unlock(vma->obj); > + > i915_gem_object_put(vma->obj); > } > > -static bool > +static int > mark_free(struct drm_mm_scan *scan, > struct i915_gem_ww_ctx *ww, > struct i915_vma *vma, > unsigned int flags, > struct list_head *unwind) > { > + int err; > + > if (i915_vma_is_pinned(vma)) > - return false; > + return -ENOSPC; > > - if (!grab_vma(vma, ww)) > - return false; > + err = grab_vma(vma, ww); > + if (err) > + return err; > > list_add(&vma->evict_link, unwind); > - return drm_mm_scan_add_block(scan, &vma->node); > + if (!drm_mm_scan_add_block(scan, &vma->node)) > + return -ENOSPC; > + > + return 0; > } > > static bool defer_evict(struct i915_vma *vma) > @@ -150,6 +159,7 @@ i915_gem_evict_something(struct > i915_address_space *vm, > enum drm_mm_insert_mode mode; > struct i915_vma *active; > int ret; > + int err = 0; > > lockdep_assert_held(&vm->mutex); > trace_i915_gem_evict(vm, min_size, alignment, flags); > @@ -210,17 +220,23 @@ i915_gem_evict_something(struct > i915_address_space *vm, > continue; > } > > - if (mark_free(&scan, ww, vma, flags, &eviction_list)) > + err = mark_free(&scan, ww, vma, flags, > &eviction_list); > + if (!err) > goto found; > + if (err == -EDEADLK) > + break; > } > > /* Nothing found, clean up and bail out! */ > list_for_each_entry_safe(vma, next, &eviction_list, > evict_link) { > ret = drm_mm_scan_remove_block(&scan, &vma->node); > BUG_ON(ret); > - ungrab_vma(vma); > + ungrab_vma(vma, ww); > } > > + if (err == -EDEADLK) > + return err; > + > /* > * Can we unpin some objects such as idle hw contents, > * or pending flips? But since only the GGTT has global > entries > @@ -267,7 +283,7 @@ i915_gem_evict_something(struct > i915_address_space *vm, > __i915_vma_pin(vma); > } else { > list_del(&vma->evict_link); > - ungrab_vma(vma); > + ungrab_vma(vma, ww); > } > } > > @@ -277,17 +293,21 @@ i915_gem_evict_something(struct > i915_address_space *vm, > __i915_vma_unpin(vma); > if (ret == 0) > ret = __i915_vma_unbind(vma); > - ungrab_vma(vma); > + ungrab_vma(vma, ww); > } > > while (ret == 0 && (node = drm_mm_scan_color_evict(&scan))) { > vma = container_of(node, struct i915_vma, node); > > /* If we find any non-objects (!vma), we cannot evict > them */ > - if (vma->node.color != I915_COLOR_UNEVICTABLE && > - grab_vma(vma, ww)) { > - ret = __i915_vma_unbind(vma); > - ungrab_vma(vma); > + if (vma->node.color != I915_COLOR_UNEVICTABLE) { > + ret = grab_vma(vma, ww); > + if (!ret) { > + ret = __i915_vma_unbind(vma); > + ungrab_vma(vma, ww); > + } else if (ret != -EDEADLK) { > + ret = -ENOSPC; > + } > } else { > ret = -ENOSPC; > } > @@ -382,8 +402,11 @@ int i915_gem_evict_for_node(struct > i915_address_space *vm, > break; > } > > - if (!grab_vma(vma, ww)) { > - ret = -ENOSPC; > + ret = grab_vma(vma, ww); > + if (ret) { > + if (ret != -EDEADLK) > + ret = -ENOSPC; > + > break; > } > > @@ -405,7 +428,7 @@ int i915_gem_evict_for_node(struct > i915_address_space *vm, > if (ret == 0) > ret = __i915_vma_unbind(vma); > > - ungrab_vma(vma); > + ungrab_vma(vma, ww); > } > > return ret;