Re: [PATCH] drm/i915: improve the catch-all evict to handle lock contention

Mani Milani <mani@xxxxxxxxxxxx> · Wed, 14 Dec 2022 14:32:45 +1100

Thank you for the patch.

I briefly tested this patch and it does fix my original problem of
"user-space application crashing due to receiving an -ENOSPC". Once
the code is reviewed, I can test it further and report back.

However, there are a few changes in this patch that change the code
behaviour, which I do not understand. I must admit that I am not very
familiar with this code, but I decided to raise these points anyway, I
hope that is OK.
Please find my comments inline below:

On Wed, Dec 7, 2022 at 3:11 AM Matthew Auld <matthew.auld@xxxxxxxxx> wrote:
>
> The catch-all evict can fail due to object lock contention, since it
> only goes as far as trylocking the object, due to us already holding the
> vm->mutex. Doing a full object lock here can deadlock, since the
> vm->mutex is always our inner lock. Add another execbuf pass which drops
> the vm->mutex and then tries to grab the object will the full lock,
> before then retrying the eviction. This should be good enough for now to
> fix the immediate regression with userspace seeing -ENOSPC from execbuf
> due to contended object locks during GTT eviction.
>
> Testcase: igt@gem_ppgtt@shrink-vs-evict-*
> Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.")
> References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627
> References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570
> References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558
> Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx>
> Cc: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx>
> Cc: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx>
> Cc: Andrzej Hajda <andrzej.hajda@xxxxxxxxx>
> Cc: Mani Milani <mani@xxxxxxxxxxxx>
> Cc: <stable@xxxxxxxxxxxxxxx> # v5.18+
> ---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 25 +++++++++++--
>  drivers/gpu/drm/i915/gem/i915_gem_mman.c      |  2 +-
>  drivers/gpu/drm/i915/i915_gem_evict.c         | 37 ++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_evict.h         |  4 +-
>  drivers/gpu/drm/i915/i915_vma.c               |  2 +-
>  .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
>  6 files changed, 56 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 86956b902c97..e2ce1e4e9723 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -745,25 +745,44 @@ static int eb_reserve(struct i915_execbuffer *eb)
>          *
>          * Defragmenting is skipped if all objects are pinned at a fixed location.
>          */
> -       for (pass = 0; pass <= 2; pass++) {
> +       for (pass = 0; pass <= 3; pass++) {
>                 int pin_flags = PIN_USER | PIN_VALIDATE;
>
>                 if (pass == 0)
>                         pin_flags |= PIN_NONBLOCK;
>
>                 if (pass >= 1)
> -                       unpinned = eb_unbind(eb, pass == 2);
> +                       unpinned = eb_unbind(eb, pass >= 2);
>
>                 if (pass == 2) {
>                         err = mutex_lock_interruptible(&eb->context->vm->mutex);
>                         if (!err) {
> -                               err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
> +                               err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL);
>                                 mutex_unlock(&eb->context->vm->mutex);
>                         }
>                         if (err)
>                                 return err;
>                 }
>
> +               if (pass == 3) {
> +retry:
> +                       err = mutex_lock_interruptible(&eb->context->vm->mutex);
> +                       if (!err) {
> +                               struct drm_i915_gem_object *busy_bo = NULL;
> +
> +                               err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
> +                               mutex_unlock(&eb->context->vm->mutex);
> +                               if (err && busy_bo) {
> +                                       err = i915_gem_object_lock(busy_bo, &eb->ww);
> +                                       i915_gem_object_put(busy_bo);
> +                                       if (!err)
> +                                               goto retry;
Could we possibly get stuck in a never-ending 'retry' loop here?

> +                               }
> +                       }
> +                       if (err)
> +                               return err;
> +               }
> +
>                 list_for_each_entry(ev, &eb->unbound, bind_link) {
>                         err = eb_reserve_vma(eb, ev, pin_flags);
>                         if (err)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> index d73ba0f5c4c5..4f69bff63068 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> @@ -369,7 +369,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
>                 if (vma == ERR_PTR(-ENOSPC)) {
>                         ret = mutex_lock_interruptible(&ggtt->vm.mutex);
>                         if (!ret) {
> -                               ret = i915_gem_evict_vm(&ggtt->vm, &ww);
> +                               ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
>                                 mutex_unlock(&ggtt->vm.mutex);
>                         }
>                         if (ret)
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 4cfe36b0366b..c02ebd6900ae 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -441,6 +441,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
>   * @vm: Address space to cleanse
>   * @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
>   * will be able to evict vma's locked by the ww as well.
> + * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then
> + * in the event i915_gem_evict_vm() is unable to trylock an object for eviction,
> + * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop
> + * the vm->mutex, before trying again to acquire the contended lock. The caller
> + * also owns a reference to the object.
>   *
>   * This function evicts all vmas from a vm.
>   *
> @@ -450,7 +455,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
>   * To clarify: This is for freeing up virtual address space, not for freeing
>   * memory in e.g. the shrinker.
>   */
> -int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
> +int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww,
> +                     struct drm_i915_gem_object **busy_bo)
>  {
>         int ret = 0;
>
> @@ -482,15 +488,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
>                          * the resv is shared among multiple objects, we still
>                          * need the object ref.
>                          */
> -                       if (dying_vma(vma) ||
> +                       if (!i915_gem_object_get_rcu(vma->obj) ||
>                             (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) {
>                                 __i915_vma_pin(vma);
>                                 list_add(&vma->evict_link, &locked_eviction_list);
>                                 continue;
>                         }
>
> -                       if (!i915_gem_object_trylock(vma->obj, ww))
> +                       if (!i915_gem_object_trylock(vma->obj, ww)) {
> +                               if (busy_bo) {
> +                                       *busy_bo = vma->obj; /* holds ref */
> +                                       ret = -EBUSY;
> +                                       break;
> +                               }
> +                               i915_gem_object_put(vma->obj);
If the 'trylock' above fails and 'busy_bo' is NULL, then the code
reaches here twice for every call of this function. This means the
'i915_gem_object_put()' above gets called twice, as opposed to the
previous behaviour where it was never called. I wonder why the change?

>                                 continue;
> +                       }
>
>                         __i915_vma_pin(vma);
>                         list_add(&vma->evict_link, &eviction_list);
> @@ -498,25 +511,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
>                 if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
>                         break;
>
> -               ret = 0;
I don't understand why the line above is removed? This causes the
inconsistencies I have explained in my comments further down.
Whereas if we keep this line, then all the vma's already in
'locked_eviction_list' and 'eviction_list' lists get evicted normally,
and with 'trylock' failing again on the consecutive loop iteration(s)
where the eviction lists are empty, we will return with the correct
-EBUSY code anyway.

>                 /* Unbind locked objects first, before unlocking the eviction_list */
>                 list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
>                         __i915_vma_unpin(vma);
>
> -                       if (ret == 0)
> +                       if (ret == 0) {
>                                 ret = __i915_vma_unbind(vma);
> -                       if (ret != -EINTR) /* "Get me out of here!" */
> -                               ret = 0;
> +                               if (ret != -EINTR) /* "Get me out of here!" */
> +                                       ret = 0;
> +                       }
> +                       if (!dying_vma(vma))
> +                               i915_gem_object_put(vma->obj);
If 'busy_bo' != NULL and the 'trylock' above fails resulting in 'ret'
= -EBUSY, then for vma's prior to that, we end up calling
'i915_gem_object_put()' without calling '__i915_vma_unbind()'.
IIUC, this means we effectively end up calling 'i915_gem_object_put()'
twice for vma's appearing before 'trylock' failure in the list, and
only once for vma's appearing after. This:
1. Does not seem correct!
2. Is different from previous code behaviour.
Why the change?

>                 }
>
>                 list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
>                         __i915_vma_unpin(vma);
> -                       if (ret == 0)
> +                       if (ret == 0) {
>                                 ret = __i915_vma_unbind(vma);
> -                       if (ret != -EINTR) /* "Get me out of here!" */
> -                               ret = 0;
> +                               if (ret != -EINTR) /* "Get me out of here!" */
> +                                       ret = 0;
> +                       }
>
>                         i915_gem_object_unlock(vma->obj);
> +                       i915_gem_object_put(vma->obj);
Same as my previous comment above.

>                 }
>         } while (ret == 0);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h
> index e593c530f9bd..bf0ee0e4fe60 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.h
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.h
> @@ -11,6 +11,7 @@
>  struct drm_mm_node;
>  struct i915_address_space;
>  struct i915_gem_ww_ctx;
> +struct drm_i915_gem_object;
>
>  int __must_check i915_gem_evict_something(struct i915_address_space *vm,
>                                           struct i915_gem_ww_ctx *ww,
> @@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
>                                          struct drm_mm_node *node,
>                                          unsigned int flags);
>  int i915_gem_evict_vm(struct i915_address_space *vm,
> -                     struct i915_gem_ww_ctx *ww);
> +                     struct i915_gem_ww_ctx *ww,
> +                     struct drm_i915_gem_object **busy_bo);
>
>  #endif /* __I915_GEM_EVICT_H__ */
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 34f0e6c923c2..7d044888ac33 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -1599,7 +1599,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>                          * locked objects when called from execbuf when pinning
>                          * is removed. This would probably regress badly.
>                          */
> -                       i915_gem_evict_vm(vm, NULL);
> +                       i915_gem_evict_vm(vm, NULL, NULL);
>                         mutex_unlock(&vm->mutex);
>                 }
>         } while (1);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> index 8c6517d29b8e..37068542aafe 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> @@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg)
>
>         /* Everything is pinned, nothing should happen */
>         mutex_lock(&ggtt->vm.mutex);
> -       err = i915_gem_evict_vm(&ggtt->vm, NULL);
> +       err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL);
>         mutex_unlock(&ggtt->vm.mutex);
>         if (err) {
>                 pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
> @@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg)
>
>         for_i915_gem_ww(&ww, err, false) {
>                 mutex_lock(&ggtt->vm.mutex);
> -               err = i915_gem_evict_vm(&ggtt->vm, &ww);
> +               err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
>                 mutex_unlock(&ggtt->vm.mutex);
>         }
>
> --
> 2.38.1
>